Merge tag 'drm-next-5.4-2019-08-09' of git://people.freedesktop.org/~agd5f/linux into drm-next
drm-next-5.4-2019-08-09: Same as drm-next-5.4-2019-08-06, but with the readq/writeq stuff fixed and 5.3-rc3 backmerged. amdgpu: - Add navi14 support - Add navi12 support - Add Arcturus support - Enable mclk DPM for Navi - Misc DC display fixes - Add perfmon support for DF - Add scatter/gather display support for Raven - Improve SMU handling for GPU reset - RAS support for GFX - Drop last of drmP.h - Add support for wiping memory on buffer release - Allow cursor async updates for fb swaps - Misc fixes and cleanups amdkfd: - Add navi14 support - Add navi12 support - Add Arcturus support - CWSR trap handlers updates for gfx9, 10 - Drop last of drmP.h - Update MAINTAINERS radeon: - Misc fixes and cleanups - Make kexec more reliable by tearing down the GPU ttm: - Add release_notify callback uapi: - Add wipe memory on release flag for buffer creation Signed-off-by: Dave Airlie <airlied@redhat.com> [airlied: resolved conflicts with ttm resv moving] From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190809184807.3381-1-alexander.deucher@amd.com
This commit is contained in:
commit
e7f7287bf5
14
MAINTAINERS
14
MAINTAINERS
|
@ -829,17 +829,11 @@ F: drivers/iommu/amd_iommu*.[ch]
|
|||
F: include/linux/amd-iommu.h
|
||||
|
||||
AMD KFD
|
||||
M: Oded Gabbay <oded.gabbay@gmail.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
T: git git://people.freedesktop.org/~gabbayo/linux.git
|
||||
M: Felix Kuehling <Felix.Kuehling@amd.com>
|
||||
L: amd-gfx@lists.freedesktop.org
|
||||
T: git git://people.freedesktop.org/~agd5f/linux
|
||||
S: Supported
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd*.[ch]
|
||||
F: drivers/gpu/drm/amd/amdkfd/
|
||||
F: drivers/gpu/drm/amd/include/cik_structs.h
|
||||
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
|
||||
|
|
|
@ -66,7 +66,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
|
|||
|
||||
amdgpu-y += \
|
||||
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
|
||||
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o
|
||||
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \
|
||||
arct_reg_init.o navi12_reg_init.o
|
||||
|
||||
# add DF block
|
||||
amdgpu-y += \
|
||||
|
@ -77,9 +78,13 @@ amdgpu-y += \
|
|||
amdgpu-y += \
|
||||
gmc_v7_0.o \
|
||||
gmc_v8_0.o \
|
||||
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \
|
||||
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
|
||||
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o
|
||||
|
||||
# add UMC block
|
||||
amdgpu-y += \
|
||||
umc_v6_1.o
|
||||
|
||||
# add IH block
|
||||
amdgpu-y += \
|
||||
amdgpu_irq.o \
|
||||
|
@ -144,7 +149,8 @@ amdgpu-y += \
|
|||
amdgpu-y += \
|
||||
amdgpu_vcn.o \
|
||||
vcn_v1_0.o \
|
||||
vcn_v2_0.o
|
||||
vcn_v2_0.o \
|
||||
vcn_v2_5.o
|
||||
|
||||
# add ATHUB block
|
||||
amdgpu-y += \
|
||||
|
@ -162,6 +168,7 @@ amdgpu-y += \
|
|||
amdgpu_amdkfd_gpuvm.o \
|
||||
amdgpu_amdkfd_gfx_v8.o \
|
||||
amdgpu_amdkfd_gfx_v9.o \
|
||||
amdgpu_amdkfd_arcturus.o \
|
||||
amdgpu_amdkfd_gfx_v10.o
|
||||
|
||||
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
|
||||
|
|
|
@ -86,6 +86,7 @@
|
|||
#include "amdgpu_smu.h"
|
||||
#include "amdgpu_discovery.h"
|
||||
#include "amdgpu_mes.h"
|
||||
#include "amdgpu_umc.h"
|
||||
|
||||
#define MAX_GPU_INSTANCE 16
|
||||
|
||||
|
@ -532,6 +533,14 @@ struct amdgpu_allowed_register_entry {
|
|||
bool grbm_indexed;
|
||||
};
|
||||
|
||||
enum amd_reset_method {
|
||||
AMD_RESET_METHOD_LEGACY = 0,
|
||||
AMD_RESET_METHOD_MODE0,
|
||||
AMD_RESET_METHOD_MODE1,
|
||||
AMD_RESET_METHOD_MODE2,
|
||||
AMD_RESET_METHOD_BACO
|
||||
};
|
||||
|
||||
/*
|
||||
* ASIC specific functions.
|
||||
*/
|
||||
|
@ -543,6 +552,7 @@ struct amdgpu_asic_funcs {
|
|||
u32 sh_num, u32 reg_offset, u32 *value);
|
||||
void (*set_vga_state)(struct amdgpu_device *adev, bool state);
|
||||
int (*reset)(struct amdgpu_device *adev);
|
||||
enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
|
||||
/* get the reference clock */
|
||||
u32 (*get_xclk)(struct amdgpu_device *adev);
|
||||
/* MM block clocks */
|
||||
|
@ -627,6 +637,9 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
|
|||
typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
|
||||
typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
|
||||
|
||||
typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
|
||||
typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
|
||||
|
||||
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
|
||||
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
|
||||
|
||||
|
@ -648,6 +661,12 @@ struct nbio_hdp_flush_reg {
|
|||
u32 ref_and_mask_cp9;
|
||||
u32 ref_and_mask_sdma0;
|
||||
u32 ref_and_mask_sdma1;
|
||||
u32 ref_and_mask_sdma2;
|
||||
u32 ref_and_mask_sdma3;
|
||||
u32 ref_and_mask_sdma4;
|
||||
u32 ref_and_mask_sdma5;
|
||||
u32 ref_and_mask_sdma6;
|
||||
u32 ref_and_mask_sdma7;
|
||||
};
|
||||
|
||||
struct amdgpu_mmio_remap {
|
||||
|
@ -668,7 +687,7 @@ struct amdgpu_nbio_funcs {
|
|||
void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
|
||||
bool use_doorbell, int doorbell_index, int doorbell_size);
|
||||
void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
|
||||
int doorbell_index);
|
||||
int doorbell_index, int instance);
|
||||
void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
|
||||
bool enable);
|
||||
void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
|
||||
|
@ -705,6 +724,9 @@ struct amdgpu_df_funcs {
|
|||
int is_disable);
|
||||
void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
|
||||
uint64_t *count);
|
||||
uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
|
||||
void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
|
||||
uint32_t ficadl_val, uint32_t ficadh_val);
|
||||
};
|
||||
/* Define the HW IP blocks will be used in driver , add more if necessary */
|
||||
enum amd_hw_ip_block_type {
|
||||
|
@ -712,6 +734,12 @@ enum amd_hw_ip_block_type {
|
|||
HDP_HWIP,
|
||||
SDMA0_HWIP,
|
||||
SDMA1_HWIP,
|
||||
SDMA2_HWIP,
|
||||
SDMA3_HWIP,
|
||||
SDMA4_HWIP,
|
||||
SDMA5_HWIP,
|
||||
SDMA6_HWIP,
|
||||
SDMA7_HWIP,
|
||||
MMHUB_HWIP,
|
||||
ATHUB_HWIP,
|
||||
NBIO_HWIP,
|
||||
|
@ -728,10 +756,12 @@ enum amd_hw_ip_block_type {
|
|||
NBIF_HWIP,
|
||||
THM_HWIP,
|
||||
CLK_HWIP,
|
||||
UMC_HWIP,
|
||||
RSMU_HWIP,
|
||||
MAX_HWIP
|
||||
};
|
||||
|
||||
#define HWIP_MAX_INSTANCE 6
|
||||
#define HWIP_MAX_INSTANCE 8
|
||||
|
||||
struct amd_powerplay {
|
||||
void *pp_handle;
|
||||
|
@ -803,6 +833,8 @@ struct amdgpu_device {
|
|||
amdgpu_wreg_t pcie_wreg;
|
||||
amdgpu_rreg_t pciep_rreg;
|
||||
amdgpu_wreg_t pciep_wreg;
|
||||
amdgpu_rreg64_t pcie_rreg64;
|
||||
amdgpu_wreg64_t pcie_wreg64;
|
||||
/* protects concurrent UVD register access */
|
||||
spinlock_t uvd_ctx_idx_lock;
|
||||
amdgpu_rreg_t uvd_ctx_rreg;
|
||||
|
@ -836,6 +868,7 @@ struct amdgpu_device {
|
|||
dma_addr_t dummy_page_addr;
|
||||
struct amdgpu_vm_manager vm_manager;
|
||||
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
|
||||
unsigned num_vmhubs;
|
||||
|
||||
/* memory management */
|
||||
struct amdgpu_mman mman;
|
||||
|
@ -915,6 +948,9 @@ struct amdgpu_device {
|
|||
/* KFD */
|
||||
struct amdgpu_kfd_dev kfd;
|
||||
|
||||
/* UMC */
|
||||
struct amdgpu_umc umc;
|
||||
|
||||
/* display related functionality */
|
||||
struct amdgpu_display_manager dm;
|
||||
|
||||
|
@ -965,6 +1001,7 @@ struct amdgpu_device {
|
|||
/* record last mm index being written through WREG32*/
|
||||
unsigned long last_mm_index;
|
||||
bool in_gpu_reset;
|
||||
enum pp_mp1_state mp1_state;
|
||||
struct mutex lock_reset;
|
||||
struct amdgpu_doorbell_index doorbell_index;
|
||||
|
||||
|
@ -1033,6 +1070,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
|
||||
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
|
||||
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
|
||||
#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
|
||||
#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
|
||||
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
|
||||
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
|
||||
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
|
||||
|
@ -1093,6 +1132,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
*/
|
||||
#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
|
||||
#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
|
||||
#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
|
||||
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
|
||||
#define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
|
||||
#define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
|
||||
|
|
|
@ -87,7 +87,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
|||
case CHIP_RAVEN:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
kfd2kgd = amdgpu_amdkfd_arcturus_get_functions();
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
|
||||
break;
|
||||
default:
|
||||
|
@ -651,8 +656,12 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
|
|||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
if (adev->powerplay.pp_funcs &&
|
||||
adev->powerplay.pp_funcs->switch_power_profile)
|
||||
if (is_support_sw_smu(adev))
|
||||
smu_switch_power_profile(&adev->smu,
|
||||
PP_SMC_POWER_PROFILE_COMPUTE,
|
||||
!idle);
|
||||
else if (adev->powerplay.pp_funcs &&
|
||||
adev->powerplay.pp_funcs->switch_power_profile)
|
||||
amdgpu_dpm_switch_power_profile(adev,
|
||||
PP_SMC_POWER_PROFILE_COMPUTE,
|
||||
!idle);
|
||||
|
@ -715,6 +724,11 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
@ -140,6 +140,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
|
|||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void);
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void);
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
|
||||
|
|
|
@ -0,0 +1,323 @@
|
|||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "kfd2kgd: " fmt
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/firmware.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "sdma0/sdma0_4_2_2_offset.h"
|
||||
#include "sdma0/sdma0_4_2_2_sh_mask.h"
|
||||
#include "sdma1/sdma1_4_2_2_offset.h"
|
||||
#include "sdma1/sdma1_4_2_2_sh_mask.h"
|
||||
#include "sdma2/sdma2_4_2_2_offset.h"
|
||||
#include "sdma2/sdma2_4_2_2_sh_mask.h"
|
||||
#include "sdma3/sdma3_4_2_2_offset.h"
|
||||
#include "sdma3/sdma3_4_2_2_sh_mask.h"
|
||||
#include "sdma4/sdma4_4_2_2_offset.h"
|
||||
#include "sdma4/sdma4_4_2_2_sh_mask.h"
|
||||
#include "sdma5/sdma5_4_2_2_offset.h"
|
||||
#include "sdma5/sdma5_4_2_2_sh_mask.h"
|
||||
#include "sdma6/sdma6_4_2_2_offset.h"
|
||||
#include "sdma6/sdma6_4_2_2_sh_mask.h"
|
||||
#include "sdma7/sdma7_4_2_2_offset.h"
|
||||
#include "sdma7/sdma7_4_2_2_sh_mask.h"
|
||||
#include "v9_structs.h"
|
||||
#include "soc15.h"
|
||||
#include "soc15d.h"
|
||||
#include "amdgpu_amdkfd_gfx_v9.h"
|
||||
|
||||
#define HQD_N_REGS 56
|
||||
#define DUMP_REG(addr) do { \
|
||||
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
|
||||
break; \
|
||||
(*dump)[i][0] = (addr) << 2; \
|
||||
(*dump)[i++][1] = RREG32(addr); \
|
||||
} while (0)
|
||||
|
||||
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
|
||||
{
|
||||
return (struct amdgpu_device *)kgd;
|
||||
}
|
||||
|
||||
static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
|
||||
{
|
||||
return (struct v9_sdma_mqd *)mqd;
|
||||
}
|
||||
|
||||
static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
|
||||
unsigned int engine_id,
|
||||
unsigned int queue_id)
|
||||
{
|
||||
uint32_t base[8] = {
|
||||
SOC15_REG_OFFSET(SDMA0, 0,
|
||||
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
|
||||
SOC15_REG_OFFSET(SDMA1, 0,
|
||||
mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL,
|
||||
SOC15_REG_OFFSET(SDMA2, 0,
|
||||
mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL,
|
||||
SOC15_REG_OFFSET(SDMA3, 0,
|
||||
mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL,
|
||||
SOC15_REG_OFFSET(SDMA4, 0,
|
||||
mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL,
|
||||
SOC15_REG_OFFSET(SDMA5, 0,
|
||||
mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL,
|
||||
SOC15_REG_OFFSET(SDMA6, 0,
|
||||
mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL,
|
||||
SOC15_REG_OFFSET(SDMA7, 0,
|
||||
mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
|
||||
};
|
||||
uint32_t retval;
|
||||
|
||||
retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
|
||||
mmSDMA0_RLC0_RB_CNTL);
|
||||
|
||||
pr_debug("sdma base address: 0x%x\n", retval);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
|
||||
u32 instance, u32 offset)
|
||||
{
|
||||
switch (instance) {
|
||||
case 0:
|
||||
return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
|
||||
case 1:
|
||||
return (adev->reg_offset[SDMA1_HWIP][0][1] + offset);
|
||||
case 2:
|
||||
return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
|
||||
case 3:
|
||||
return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
|
||||
case 4:
|
||||
return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
|
||||
case 5:
|
||||
return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
|
||||
case 6:
|
||||
return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
|
||||
case 7:
|
||||
return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
|
||||
uint32_t __user *wptr, struct mm_struct *mm)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct v9_sdma_mqd *m;
|
||||
uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
|
||||
unsigned long end_jiffies;
|
||||
uint32_t data;
|
||||
uint64_t data64;
|
||||
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
|
||||
|
||||
m = get_sdma_mqd(mqd);
|
||||
sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
|
||||
m->sdma_queue_id);
|
||||
sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev,
|
||||
m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL);
|
||||
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
|
||||
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
|
||||
|
||||
end_jiffies = msecs_to_jiffies(2000) + jiffies;
|
||||
while (true) {
|
||||
data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
|
||||
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
|
||||
break;
|
||||
if (time_after(jiffies, end_jiffies))
|
||||
return -ETIME;
|
||||
usleep_range(500, 1000);
|
||||
}
|
||||
data = RREG32(sdmax_gfx_context_cntl);
|
||||
data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
|
||||
RESUME_CTX, 0);
|
||||
WREG32(sdmax_gfx_context_cntl, data);
|
||||
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
|
||||
m->sdmax_rlcx_doorbell_offset);
|
||||
|
||||
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
|
||||
ENABLE, 1);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
|
||||
m->sdmax_rlcx_rb_rptr_hi);
|
||||
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
|
||||
if (read_user_wptr(mm, wptr64, data64)) {
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
|
||||
lower_32_bits(data64));
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
|
||||
upper_32_bits(data64));
|
||||
} else {
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
|
||||
m->sdmax_rlcx_rb_rptr);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
|
||||
m->sdmax_rlcx_rb_rptr_hi);
|
||||
}
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
|
||||
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
|
||||
m->sdmax_rlcx_rb_base_hi);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
|
||||
m->sdmax_rlcx_rb_rptr_addr_lo);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
|
||||
m->sdmax_rlcx_rb_rptr_addr_hi);
|
||||
|
||||
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
|
||||
RB_ENABLE, 1);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
|
||||
uint32_t engine_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
|
||||
uint32_t i = 0, reg;
|
||||
#undef HQD_N_REGS
|
||||
#define HQD_N_REGS (19+6+7+10)
|
||||
|
||||
*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
|
||||
if (*dump == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
|
||||
DUMP_REG(sdma_base_addr + reg);
|
||||
for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
|
||||
DUMP_REG(sdma_base_addr + reg);
|
||||
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
|
||||
reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
|
||||
DUMP_REG(sdma_base_addr + reg);
|
||||
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
|
||||
reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
|
||||
DUMP_REG(sdma_base_addr + reg);
|
||||
|
||||
WARN_ON_ONCE(i != HQD_N_REGS);
|
||||
*n_regs = i;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct v9_sdma_mqd *m;
|
||||
uint32_t sdma_base_addr;
|
||||
uint32_t sdma_rlc_rb_cntl;
|
||||
|
||||
m = get_sdma_mqd(mqd);
|
||||
sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
|
||||
m->sdma_queue_id);
|
||||
|
||||
sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
|
||||
|
||||
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
unsigned int utimeout)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct v9_sdma_mqd *m;
|
||||
uint32_t sdma_base_addr;
|
||||
uint32_t temp;
|
||||
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
|
||||
|
||||
m = get_sdma_mqd(mqd);
|
||||
sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
|
||||
m->sdma_queue_id);
|
||||
|
||||
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
|
||||
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
|
||||
|
||||
while (true) {
|
||||
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
|
||||
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
|
||||
break;
|
||||
if (time_after(jiffies, end_jiffies))
|
||||
return -ETIME;
|
||||
usleep_range(500, 1000);
|
||||
}
|
||||
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
|
||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
|
||||
RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
|
||||
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
|
||||
|
||||
m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
|
||||
m->sdmax_rlcx_rb_rptr_hi =
|
||||
RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_gfx_v9_init_interrupts,
|
||||
.hqd_load = kgd_gfx_v9_hqd_load,
|
||||
.hqd_sdma_load = kgd_hqd_sdma_load,
|
||||
.hqd_dump = kgd_gfx_v9_hqd_dump,
|
||||
.hqd_sdma_dump = kgd_hqd_sdma_dump,
|
||||
.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
|
||||
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
|
||||
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
|
||||
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
|
||||
.address_watch_disable = kgd_gfx_v9_address_watch_disable,
|
||||
.address_watch_execute = kgd_gfx_v9_address_watch_execute,
|
||||
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
|
||||
.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
|
||||
.get_atc_vmid_pasid_mapping_pasid =
|
||||
kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
|
||||
.get_atc_vmid_pasid_mapping_valid =
|
||||
kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
|
||||
.set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va,
|
||||
.get_tile_config = kgd_gfx_v9_get_tile_config,
|
||||
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
|
||||
.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
|
||||
.get_hive_id = amdgpu_amdkfd_get_hive_id,
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void)
|
||||
{
|
||||
return (struct kfd2kgd_calls *)&kfd2kgd;
|
||||
}
|
||||
|
|
@ -27,7 +27,6 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_ucode.h"
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include "soc15d.h"
|
||||
#include "mmhub_v1_0.h"
|
||||
#include "gfxhub_v1_0.h"
|
||||
#include "gmc_v9_0.h"
|
||||
|
||||
|
||||
#define V9_PIPE_PER_MEC (4)
|
||||
|
@ -58,66 +59,11 @@ enum hqd_dequeue_request_type {
|
|||
RESET_WAVES
|
||||
};
|
||||
|
||||
/*
|
||||
* Register access functions
|
||||
*/
|
||||
|
||||
static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases);
|
||||
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
||||
unsigned int vmid);
|
||||
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
|
||||
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr,
|
||||
uint32_t wptr_shift, uint32_t wptr_mask,
|
||||
struct mm_struct *mm);
|
||||
static int kgd_hqd_dump(struct kgd_dev *kgd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
|
||||
uint32_t __user *wptr, struct mm_struct *mm);
|
||||
static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
|
||||
uint32_t engine_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
||||
uint32_t pipe_id, uint32_t queue_id);
|
||||
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
|
||||
static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id);
|
||||
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
unsigned int utimeout);
|
||||
static int kgd_address_watch_disable(struct kgd_dev *kgd);
|
||||
static int kgd_address_watch_execute(struct kgd_dev *kgd,
|
||||
unsigned int watch_point_id,
|
||||
uint32_t cntl_val,
|
||||
uint32_t addr_hi,
|
||||
uint32_t addr_lo);
|
||||
static int kgd_wave_control_execute(struct kgd_dev *kgd,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd);
|
||||
static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
|
||||
unsigned int watch_point_id,
|
||||
unsigned int reg_offset);
|
||||
|
||||
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
|
||||
uint8_t vmid);
|
||||
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
||||
uint8_t vmid);
|
||||
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint64_t page_table_base);
|
||||
static void set_scratch_backing_va(struct kgd_dev *kgd,
|
||||
uint64_t va, uint32_t vmid);
|
||||
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
|
||||
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
|
||||
|
||||
/* Because of REG_GET_FIELD() being used, we put this function in the
|
||||
* asic specific file.
|
||||
*/
|
||||
static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
|
||||
int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
|
||||
struct tile_config *config)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
@ -135,39 +81,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
.hqd_load = kgd_hqd_load,
|
||||
.hqd_sdma_load = kgd_hqd_sdma_load,
|
||||
.hqd_dump = kgd_hqd_dump,
|
||||
.hqd_sdma_dump = kgd_hqd_sdma_dump,
|
||||
.hqd_is_occupied = kgd_hqd_is_occupied,
|
||||
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
|
||||
.hqd_destroy = kgd_hqd_destroy,
|
||||
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
|
||||
.address_watch_disable = kgd_address_watch_disable,
|
||||
.address_watch_execute = kgd_address_watch_execute,
|
||||
.wave_control_execute = kgd_wave_control_execute,
|
||||
.address_watch_get_offset = kgd_address_watch_get_offset,
|
||||
.get_atc_vmid_pasid_mapping_pasid =
|
||||
get_atc_vmid_pasid_mapping_pasid,
|
||||
.get_atc_vmid_pasid_mapping_valid =
|
||||
get_atc_vmid_pasid_mapping_valid,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = amdgpu_amdkfd_get_tile_config,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.get_hive_id = amdgpu_amdkfd_get_hive_id,
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
|
||||
{
|
||||
return (struct kfd2kgd_calls *)&kfd2kgd;
|
||||
}
|
||||
|
||||
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
|
||||
{
|
||||
return (struct amdgpu_device *)kgd;
|
||||
|
@ -215,7 +128,7 @@ static void release_queue(struct kgd_dev *kgd)
|
|||
unlock_srbm(kgd);
|
||||
}
|
||||
|
||||
static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
||||
void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base,
|
||||
uint32_t sh_mem_ape1_limit,
|
||||
|
@ -232,7 +145,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
|||
unlock_srbm(kgd);
|
||||
}
|
||||
|
||||
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
||||
int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
||||
unsigned int vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
|
@ -293,7 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
|||
* but still works
|
||||
*/
|
||||
|
||||
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
|
||||
int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
uint32_t mec;
|
||||
|
@ -343,7 +256,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
|
|||
return (struct v9_sdma_mqd *)mqd;
|
||||
}
|
||||
|
||||
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||
int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr,
|
||||
uint32_t wptr_shift, uint32_t wptr_mask,
|
||||
struct mm_struct *mm)
|
||||
|
@ -438,7 +351,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_hqd_dump(struct kgd_dev *kgd,
|
||||
int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
{
|
||||
|
@ -575,7 +488,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
||||
bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
||||
uint32_t pipe_id, uint32_t queue_id)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
|
@ -616,7 +529,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
|
|||
return false;
|
||||
}
|
||||
|
||||
static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
|
@ -704,7 +617,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
|
||||
bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
|
||||
uint8_t vmid)
|
||||
{
|
||||
uint32_t reg;
|
||||
|
@ -715,7 +628,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
|
|||
return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
|
||||
}
|
||||
|
||||
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
||||
uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
||||
uint8_t vmid)
|
||||
{
|
||||
uint32_t reg;
|
||||
|
@ -754,7 +667,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||
int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
int vmid;
|
||||
|
@ -773,8 +686,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
|||
for (vmid = 0; vmid < 16; vmid++) {
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
|
||||
continue;
|
||||
if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
|
||||
if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
|
||||
if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
|
||||
if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
|
||||
== pasid) {
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid,
|
||||
flush_type);
|
||||
|
@ -786,7 +699,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
||||
int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
|
||||
|
@ -814,12 +727,12 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_address_watch_disable(struct kgd_dev *kgd)
|
||||
int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_address_watch_execute(struct kgd_dev *kgd,
|
||||
int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
|
||||
unsigned int watch_point_id,
|
||||
uint32_t cntl_val,
|
||||
uint32_t addr_hi,
|
||||
|
@ -828,7 +741,7 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_wave_control_execute(struct kgd_dev *kgd,
|
||||
int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd)
|
||||
{
|
||||
|
@ -853,14 +766,14 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
|
||||
uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
|
||||
unsigned int watch_point_id,
|
||||
unsigned int reg_offset)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_scratch_backing_va(struct kgd_dev *kgd,
|
||||
void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,
|
||||
uint64_t va, uint32_t vmid)
|
||||
{
|
||||
/* No longer needed on GFXv9. The scratch base address is
|
||||
|
@ -869,7 +782,7 @@ static void set_scratch_backing_va(struct kgd_dev *kgd,
|
|||
*/
|
||||
}
|
||||
|
||||
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint64_t page_table_base)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
|
@ -884,7 +797,45 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
|||
* now, all processes share the same address space size, like
|
||||
* on GFX8 and older.
|
||||
*/
|
||||
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
if (adev->asic_type == CHIP_ARCTURUS) {
|
||||
/* Two MMHUBs */
|
||||
mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base);
|
||||
mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base);
|
||||
} else
|
||||
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
|
||||
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_gfx_v9_init_interrupts,
|
||||
.hqd_load = kgd_gfx_v9_hqd_load,
|
||||
.hqd_sdma_load = kgd_hqd_sdma_load,
|
||||
.hqd_dump = kgd_gfx_v9_hqd_dump,
|
||||
.hqd_sdma_dump = kgd_hqd_sdma_dump,
|
||||
.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
|
||||
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
|
||||
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
|
||||
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
|
||||
.address_watch_disable = kgd_gfx_v9_address_watch_disable,
|
||||
.address_watch_execute = kgd_gfx_v9_address_watch_execute,
|
||||
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
|
||||
.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
|
||||
.get_atc_vmid_pasid_mapping_pasid =
|
||||
kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
|
||||
.get_atc_vmid_pasid_mapping_valid =
|
||||
kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
|
||||
.set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va,
|
||||
.get_tile_config = kgd_gfx_v9_get_tile_config,
|
||||
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
|
||||
.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
|
||||
.get_hive_id = amdgpu_amdkfd_get_hive_id,
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
|
||||
{
|
||||
return (struct kfd2kgd_calls *)&kfd2kgd;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases);
|
||||
int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
|
||||
unsigned int vmid);
|
||||
int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
|
||||
int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr,
|
||||
uint32_t wptr_shift, uint32_t wptr_mask,
|
||||
struct mm_struct *mm);
|
||||
int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||
bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
||||
uint32_t pipe_id, uint32_t queue_id);
|
||||
int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id);
|
||||
int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd);
|
||||
int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,
|
||||
unsigned int watch_point_id,
|
||||
uint32_t cntl_val,
|
||||
uint32_t addr_hi,
|
||||
uint32_t addr_lo);
|
||||
int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd);
|
||||
uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
|
||||
unsigned int watch_point_id,
|
||||
unsigned int reg_offset);
|
||||
|
||||
bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
|
||||
uint8_t vmid);
|
||||
uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
||||
uint8_t vmid);
|
||||
void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
||||
uint64_t page_table_base);
|
||||
void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,
|
||||
uint64_t va, uint32_t vmid);
|
||||
int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
|
||||
int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
|
||||
int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
|
||||
struct tile_config *config);
|
|
@ -1090,7 +1090,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
*/
|
||||
if (flags & ALLOC_MEM_FLAGS_VRAM) {
|
||||
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
|
||||
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
|
||||
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
|
||||
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
|
||||
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
|
||||
|
|
|
@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
|||
struct amdgpu_ctx *ctx)
|
||||
{
|
||||
unsigned num_entities = amdgput_ctx_total_num_entities();
|
||||
unsigned i, j;
|
||||
unsigned i, j, k;
|
||||
int r;
|
||||
|
||||
if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
|
||||
|
@ -123,7 +123,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
|||
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
|
||||
struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
|
||||
struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
|
||||
unsigned num_rings;
|
||||
unsigned num_rings = 0;
|
||||
unsigned num_rqs = 0;
|
||||
|
||||
switch (i) {
|
||||
|
@ -154,16 +154,26 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
|
|||
num_rings = 1;
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCN_DEC:
|
||||
rings[0] = &adev->vcn.ring_dec;
|
||||
num_rings = 1;
|
||||
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
|
||||
if (adev->vcn.harvest_config & (1 << j))
|
||||
continue;
|
||||
rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
|
||||
}
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCN_ENC:
|
||||
rings[0] = &adev->vcn.ring_enc[0];
|
||||
num_rings = 1;
|
||||
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
|
||||
if (adev->vcn.harvest_config & (1 << j))
|
||||
continue;
|
||||
for (k = 0; k < adev->vcn.num_enc_rings; ++k)
|
||||
rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
|
||||
}
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCN_JPEG:
|
||||
rings[0] = &adev->vcn.ring_jpeg;
|
||||
num_rings = 1;
|
||||
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
|
||||
if (adev->vcn.harvest_config & (1 << j))
|
||||
continue;
|
||||
rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,10 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
|
|||
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
|
||||
|
||||
#define AMDGPU_RESUME_MS 2000
|
||||
|
||||
|
@ -98,7 +101,10 @@ static const char *amdgpu_asic_name[] = {
|
|||
"VEGA12",
|
||||
"VEGA20",
|
||||
"RAVEN",
|
||||
"ARCTURUS",
|
||||
"NAVI10",
|
||||
"NAVI14",
|
||||
"NAVI12",
|
||||
"LAST",
|
||||
};
|
||||
|
||||
|
@ -412,6 +418,40 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
|
|||
BUG();
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_invalid_rreg64 - dummy 64 bit reg read function
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @reg: offset of register
|
||||
*
|
||||
* Dummy register read function. Used for register blocks
|
||||
* that certain asics don't have (all asics).
|
||||
* Returns the value in the register.
|
||||
*/
|
||||
static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
|
||||
{
|
||||
DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
|
||||
BUG();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_invalid_wreg64 - dummy reg write function
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @reg: offset of register
|
||||
* @v: value to write to the register
|
||||
*
|
||||
* Dummy register read function. Used for register blocks
|
||||
* that certain asics don't have (all asics).
|
||||
*/
|
||||
static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
|
||||
{
|
||||
DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
|
||||
reg, v);
|
||||
BUG();
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_block_invalid_rreg - dummy reg read function
|
||||
*
|
||||
|
@ -1384,9 +1424,18 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
|
|||
else
|
||||
chip_name = "raven";
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
chip_name = "arcturus";
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
chip_name = "navi10";
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
chip_name = "navi14";
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
chip_name = "navi12";
|
||||
break;
|
||||
}
|
||||
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
|
||||
|
@ -1529,6 +1578,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
|||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_ARCTURUS:
|
||||
if (adev->asic_type == CHIP_RAVEN)
|
||||
adev->family = AMDGPU_FAMILY_RV;
|
||||
else
|
||||
|
@ -1539,6 +1589,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
|||
return r;
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
adev->family = AMDGPU_FAMILY_NV;
|
||||
|
||||
r = nv_set_ip_blocks(adev);
|
||||
|
@ -1560,9 +1612,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
|||
r = amdgpu_virt_request_full_gpu(adev, true);
|
||||
if (r)
|
||||
return -EAGAIN;
|
||||
|
||||
/* query the reg access mode at the very beginning */
|
||||
amdgpu_virt_init_reg_access_mode(adev);
|
||||
}
|
||||
|
||||
adev->pm.pp_feature = amdgpu_pp_feature_mask;
|
||||
|
@ -1665,28 +1714,34 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
|
|||
|
||||
if (adev->asic_type >= CHIP_VEGA10) {
|
||||
for (i = 0; i < adev->num_ip_blocks; i++) {
|
||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
|
||||
if (adev->in_gpu_reset || adev->in_suspend) {
|
||||
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
|
||||
break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
|
||||
r = adev->ip_blocks[i].version->funcs->resume(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("resume of IP block <%s> failed %d\n",
|
||||
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
|
||||
continue;
|
||||
|
||||
/* no need to do the fw loading again if already done*/
|
||||
if (adev->ip_blocks[i].status.hw == true)
|
||||
break;
|
||||
|
||||
if (adev->in_gpu_reset || adev->in_suspend) {
|
||||
r = adev->ip_blocks[i].version->funcs->resume(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("resume of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
return r;
|
||||
}
|
||||
} else {
|
||||
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
return r;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
} else {
|
||||
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
return r;
|
||||
}
|
||||
adev->ip_blocks[i].status.hw = true;
|
||||
}
|
||||
|
||||
adev->ip_blocks[i].status.hw = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
|
||||
|
||||
return r;
|
||||
|
@ -2128,7 +2183,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
|
|||
if (r) {
|
||||
DRM_ERROR("suspend of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
return r;
|
||||
}
|
||||
adev->ip_blocks[i].status.hw = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2163,6 +2220,25 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
|
|||
DRM_ERROR("suspend of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
}
|
||||
adev->ip_blocks[i].status.hw = false;
|
||||
/* handle putting the SMC in the appropriate state */
|
||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
|
||||
if (is_support_sw_smu(adev)) {
|
||||
/* todo */
|
||||
} else if (adev->powerplay.pp_funcs &&
|
||||
adev->powerplay.pp_funcs->set_mp1_state) {
|
||||
r = adev->powerplay.pp_funcs->set_mp1_state(
|
||||
adev->powerplay.pp_handle,
|
||||
adev->mp1_state);
|
||||
if (r) {
|
||||
DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
|
||||
adev->mp1_state, r);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
adev->ip_blocks[i].status.hw = false;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -2215,6 +2291,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
|
|||
for (j = 0; j < adev->num_ip_blocks; j++) {
|
||||
block = &adev->ip_blocks[j];
|
||||
|
||||
block->status.hw = false;
|
||||
if (block->version->type != ip_order[i] ||
|
||||
!block->status.valid)
|
||||
continue;
|
||||
|
@ -2223,6 +2300,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
|
|||
DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
|
||||
if (r)
|
||||
return r;
|
||||
block->status.hw = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2250,13 +2328,15 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
|
|||
block = &adev->ip_blocks[j];
|
||||
|
||||
if (block->version->type != ip_order[i] ||
|
||||
!block->status.valid)
|
||||
!block->status.valid ||
|
||||
block->status.hw)
|
||||
continue;
|
||||
|
||||
r = block->version->funcs->hw_init(adev);
|
||||
DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
|
||||
if (r)
|
||||
return r;
|
||||
block->status.hw = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2280,17 +2360,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
|
|||
int i, r;
|
||||
|
||||
for (i = 0; i < adev->num_ip_blocks; i++) {
|
||||
if (!adev->ip_blocks[i].status.valid)
|
||||
if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
|
||||
continue;
|
||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
|
||||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
|
||||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
|
||||
|
||||
r = adev->ip_blocks[i].version->funcs->resume(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("resume of IP block <%s> failed %d\n",
|
||||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
return r;
|
||||
}
|
||||
adev->ip_blocks[i].status.hw = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2315,7 +2397,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
|
|||
int i, r;
|
||||
|
||||
for (i = 0; i < adev->num_ip_blocks; i++) {
|
||||
if (!adev->ip_blocks[i].status.valid)
|
||||
if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
|
||||
continue;
|
||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
|
||||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
|
||||
|
@ -2328,6 +2410,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
|
|||
adev->ip_blocks[i].version->funcs->name, r);
|
||||
return r;
|
||||
}
|
||||
adev->ip_blocks[i].status.hw = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -2426,6 +2509,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
|
|||
#endif
|
||||
#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
#endif
|
||||
return amdgpu_dc != 0;
|
||||
#endif
|
||||
|
@ -2509,6 +2594,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
adev->pcie_wreg = &amdgpu_invalid_wreg;
|
||||
adev->pciep_rreg = &amdgpu_invalid_rreg;
|
||||
adev->pciep_wreg = &amdgpu_invalid_wreg;
|
||||
adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
|
||||
adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
|
||||
adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
|
||||
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
|
||||
adev->didt_rreg = &amdgpu_invalid_rreg;
|
||||
|
@ -3627,6 +3714,17 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
|
|||
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
adev->in_gpu_reset = 1;
|
||||
switch (amdgpu_asic_reset_method(adev)) {
|
||||
case AMD_RESET_METHOD_MODE1:
|
||||
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
|
||||
break;
|
||||
case AMD_RESET_METHOD_MODE2:
|
||||
adev->mp1_state = PP_MP1_STATE_RESET;
|
||||
break;
|
||||
default:
|
||||
adev->mp1_state = PP_MP1_STATE_NONE;
|
||||
break;
|
||||
}
|
||||
/* Block kfd: SRIOV would do it separately */
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
amdgpu_amdkfd_pre_reset(adev);
|
||||
|
@ -3640,6 +3738,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
|
|||
if (!amdgpu_sriov_vf(adev))
|
||||
amdgpu_amdkfd_post_reset(adev);
|
||||
amdgpu_vf_error_trans_all(adev);
|
||||
adev->mp1_state = PP_MP1_STATE_NONE;
|
||||
adev->in_gpu_reset = 0;
|
||||
mutex_unlock(&adev->lock_reset);
|
||||
}
|
||||
|
|
|
@ -191,7 +191,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
|
|||
}
|
||||
|
||||
if (!adev->enable_virtual_display) {
|
||||
r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
|
||||
r = amdgpu_bo_pin(new_abo,
|
||||
amdgpu_display_supported_domains(adev, new_abo->flags));
|
||||
if (unlikely(r != 0)) {
|
||||
DRM_ERROR("failed to pin new abo buffer before flip\n");
|
||||
goto unreserve;
|
||||
|
@ -495,13 +496,25 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
|
|||
.create_handle = drm_gem_fb_create_handle,
|
||||
};
|
||||
|
||||
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev)
|
||||
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
|
||||
uint64_t bo_flags)
|
||||
{
|
||||
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
|
||||
#if defined(CONFIG_DRM_AMD_DC)
|
||||
if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN &&
|
||||
adev->flags & AMD_IS_APU &&
|
||||
/*
|
||||
* if amdgpu_bo_support_uswc returns false it means that USWC mappings
|
||||
* is not supported for this board. But this mapping is required
|
||||
* to avoid hang caused by placement of scanout BO in GTT on certain
|
||||
* APUs. So force the BO placement to VRAM in case this architecture
|
||||
* will not allow USWC mappings.
|
||||
* Also, don't allow GTT domain if the BO doens't have USWC falg set.
|
||||
*/
|
||||
if (adev->asic_type >= CHIP_CARRIZO &&
|
||||
adev->asic_type <= CHIP_RAVEN &&
|
||||
(adev->flags & AMD_IS_APU) &&
|
||||
(bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
|
||||
amdgpu_bo_support_uswc(bo_flags) &&
|
||||
amdgpu_device_asic_has_dc_support(adev->asic_type))
|
||||
domain |= AMDGPU_GEM_DOMAIN_GTT;
|
||||
#endif
|
||||
|
|
|
@ -38,7 +38,8 @@
|
|||
int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp);
|
||||
void amdgpu_display_update_priority(struct amdgpu_device *adev);
|
||||
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
|
||||
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
|
||||
uint64_t bo_flags);
|
||||
struct drm_framebuffer *
|
||||
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
|
||||
struct drm_file *file_priv,
|
||||
|
|
|
@ -285,7 +285,7 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
|
|||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
struct ttm_operation_ctx ctx = { true, false };
|
||||
u32 domain = amdgpu_display_supported_domains(adev);
|
||||
u32 domain = amdgpu_display_supported_domains(adev, bo->flags);
|
||||
int ret;
|
||||
bool reads = (direction == DMA_BIDIRECTIONAL ||
|
||||
direction == DMA_FROM_DEVICE);
|
||||
|
|
|
@ -130,13 +130,18 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
|
|||
AMDGPU_VEGA20_DOORBELL_IH = 0x178,
|
||||
/* MMSCH: 392~407
|
||||
* overlap the doorbell assignment with VCN as they are mutually exclusive
|
||||
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
|
||||
* VCN engine's doorbell is 32 bit and two VCN ring share one QWORD
|
||||
*/
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B,
|
||||
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */
|
||||
AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F,
|
||||
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188,
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189,
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A,
|
||||
|
|
|
@ -996,6 +996,10 @@ static const struct pci_device_id pciidlist[] = {
|
|||
/* Raven */
|
||||
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
|
||||
{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
|
||||
/* Arcturus */
|
||||
{0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
|
||||
{0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
|
||||
{0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
|
||||
/* Navi10 */
|
||||
{0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
|
||||
{0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
|
||||
|
@ -1092,21 +1096,21 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
|
|||
* unfortunately we can't detect certain
|
||||
* hypervisors so just do this all the time.
|
||||
*/
|
||||
adev->mp1_state = PP_MP1_STATE_UNLOAD;
|
||||
amdgpu_device_ip_suspend(adev);
|
||||
adev->mp1_state = PP_MP1_STATE_NONE;
|
||||
}
|
||||
|
||||
static int amdgpu_pmops_suspend(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
|
||||
struct drm_device *drm_dev = pci_get_drvdata(pdev);
|
||||
return amdgpu_device_suspend(drm_dev, true, true);
|
||||
}
|
||||
|
||||
static int amdgpu_pmops_resume(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct drm_device *drm_dev = pci_get_drvdata(pdev);
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
|
||||
/* GPU comes up enabled by the bios on resume */
|
||||
if (amdgpu_device_is_px(drm_dev)) {
|
||||
|
@ -1120,33 +1124,29 @@ static int amdgpu_pmops_resume(struct device *dev)
|
|||
|
||||
static int amdgpu_pmops_freeze(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
|
||||
struct drm_device *drm_dev = pci_get_drvdata(pdev);
|
||||
return amdgpu_device_suspend(drm_dev, false, true);
|
||||
}
|
||||
|
||||
static int amdgpu_pmops_thaw(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
|
||||
struct drm_device *drm_dev = pci_get_drvdata(pdev);
|
||||
return amdgpu_device_resume(drm_dev, false, true);
|
||||
}
|
||||
|
||||
static int amdgpu_pmops_poweroff(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
|
||||
struct drm_device *drm_dev = pci_get_drvdata(pdev);
|
||||
return amdgpu_device_suspend(drm_dev, true, true);
|
||||
}
|
||||
|
||||
static int amdgpu_pmops_restore(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
|
||||
struct drm_device *drm_dev = pci_get_drvdata(pdev);
|
||||
return amdgpu_device_resume(drm_dev, false, true);
|
||||
}
|
||||
|
||||
|
@ -1205,8 +1205,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
|
|||
|
||||
static int amdgpu_pmops_runtime_idle(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct drm_device *drm_dev = pci_get_drvdata(pdev);
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
struct drm_crtc *crtc;
|
||||
|
||||
if (!amdgpu_device_is_px(drm_dev)) {
|
||||
|
|
|
@ -131,6 +131,10 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
|
|||
int aligned_size, size;
|
||||
int height = mode_cmd->height;
|
||||
u32 cpp;
|
||||
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
|
||||
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
|
||||
AMDGPU_GEM_CREATE_VRAM_CLEARED |
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
|
||||
info = drm_get_format_info(adev->ddev, mode_cmd);
|
||||
cpp = info->cpp[0];
|
||||
|
@ -138,15 +142,11 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
|
|||
/* need to align pitch with crtc limits */
|
||||
mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
|
||||
fb_tiled);
|
||||
domain = amdgpu_display_supported_domains(adev);
|
||||
|
||||
domain = amdgpu_display_supported_domains(adev, flags);
|
||||
height = ALIGN(mode_cmd->height, 8);
|
||||
size = mode_cmd->pitches[0] * height;
|
||||
aligned_size = ALIGN(size, PAGE_SIZE);
|
||||
ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain,
|
||||
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
|
||||
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
|
||||
AMDGPU_GEM_CREATE_VRAM_CLEARED,
|
||||
ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags,
|
||||
ttm_bo_type_kernel, NULL, &gobj);
|
||||
if (ret) {
|
||||
pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
|
||||
|
@ -168,7 +168,6 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
|
|||
dev_err(adev->dev, "FB failed to set tiling flags\n");
|
||||
}
|
||||
|
||||
|
||||
ret = amdgpu_bo_pin(abo, domain);
|
||||
if (ret) {
|
||||
amdgpu_bo_unreserve(abo);
|
||||
|
|
|
@ -747,7 +747,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
|
|||
struct amdgpu_device *adev = dev->dev_private;
|
||||
struct drm_gem_object *gobj;
|
||||
uint32_t handle;
|
||||
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
u32 domain;
|
||||
int r;
|
||||
|
||||
|
@ -764,7 +765,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
|
|||
args->size = (u64)args->pitch * args->height;
|
||||
args->size = ALIGN(args->size, PAGE_SIZE);
|
||||
domain = amdgpu_bo_get_preferred_pin_domain(adev,
|
||||
amdgpu_display_supported_domains(adev));
|
||||
amdgpu_display_supported_domains(adev, flags));
|
||||
r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
|
||||
ttm_bo_type_device, NULL, &gobj);
|
||||
if (r)
|
||||
|
|
|
@ -389,7 +389,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
|
|||
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
|
||||
}
|
||||
|
||||
if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) {
|
||||
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
|
||||
/* create MQD for each KGQ */
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
|
@ -437,7 +437,7 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
|
|||
struct amdgpu_ring *ring = NULL;
|
||||
int i;
|
||||
|
||||
if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) {
|
||||
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
kfree(adev->gfx.me.mqd_backup[i]);
|
||||
|
@ -456,7 +456,7 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
|
|||
}
|
||||
|
||||
ring = &adev->gfx.kiq.ring;
|
||||
if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring)
|
||||
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring)
|
||||
kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);
|
||||
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
|
||||
amdgpu_bo_free_kernel(&ring->mqd_obj,
|
||||
|
|
|
@ -196,6 +196,8 @@ struct amdgpu_gfx_funcs {
|
|||
uint32_t *dst);
|
||||
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
|
||||
u32 queue, u32 vmid);
|
||||
int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
|
||||
int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
|
||||
};
|
||||
|
||||
struct amdgpu_ngg_buf {
|
||||
|
|
|
@ -368,7 +368,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
|
|||
* are broken on Navi10 and Navi14.
|
||||
*/
|
||||
if (needs_flush && (adev->asic_type < CHIP_VEGA10 ||
|
||||
adev->asic_type == CHIP_NAVI10))
|
||||
adev->asic_type == CHIP_NAVI10 ||
|
||||
adev->asic_type == CHIP_NAVI14))
|
||||
continue;
|
||||
|
||||
/* Good, we can use this VMID. Remember this submission as
|
||||
|
|
|
@ -408,23 +408,38 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
break;
|
||||
case AMDGPU_HW_IP_VCN_DEC:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
if (adev->vcn.ring_dec.sched.ready)
|
||||
++num_rings;
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->vcn.inst[i].ring_dec.sched.ready)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 16;
|
||||
ib_size_alignment = 16;
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCN_ENC:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; i++)
|
||||
if (adev->vcn.ring_enc[i].sched.ready)
|
||||
++num_rings;
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
for (j = 0; j < adev->vcn.num_enc_rings; j++)
|
||||
if (adev->vcn.inst[i].ring_enc[j].sched.ready)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 64;
|
||||
ib_size_alignment = 1;
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCN_JPEG:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
if (adev->vcn.ring_jpeg.sched.ready)
|
||||
++num_rings;
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->vcn.inst[i].ring_jpeg.sched.ready)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 16;
|
||||
ib_size_alignment = 16;
|
||||
break;
|
||||
|
|
|
@ -80,9 +80,6 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
|
|||
if (bo->pin_count > 0)
|
||||
amdgpu_bo_subtract_pin_size(bo);
|
||||
|
||||
if (bo->kfd_bo)
|
||||
amdgpu_amdkfd_unreserve_memory_limit(bo);
|
||||
|
||||
amdgpu_bo_kunmap(bo);
|
||||
|
||||
if (bo->tbo.base.import_attach)
|
||||
|
@ -413,6 +410,40 @@ fail:
|
|||
return false;
|
||||
}
|
||||
|
||||
bool amdgpu_bo_support_uswc(u64 bo_flags)
|
||||
{
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
|
||||
* See https://bugs.freedesktop.org/show_bug.cgi?id=84627
|
||||
*/
|
||||
return false;
|
||||
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
|
||||
/* Don't try to enable write-combining when it can't work, or things
|
||||
* may be slow
|
||||
* See https://bugs.freedesktop.org/show_bug.cgi?id=88758
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_COMPILE_TEST
|
||||
#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
|
||||
thanks to write-combining
|
||||
#endif
|
||||
|
||||
if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
|
||||
DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
|
||||
"better performance thanks to write-combining\n");
|
||||
return false;
|
||||
#else
|
||||
/* For architectures that don't support WC memory,
|
||||
* mask out the WC flag from the BO
|
||||
*/
|
||||
if (!drm_arch_can_wc_memory())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo_param *bp,
|
||||
struct amdgpu_bo **bo_ptr)
|
||||
|
@ -466,33 +497,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
|
|||
|
||||
bo->flags = bp->flags;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
|
||||
* See https://bugs.freedesktop.org/show_bug.cgi?id=84627
|
||||
*/
|
||||
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
|
||||
/* Don't try to enable write-combining when it can't work, or things
|
||||
* may be slow
|
||||
* See https://bugs.freedesktop.org/show_bug.cgi?id=88758
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_COMPILE_TEST
|
||||
#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
|
||||
thanks to write-combining
|
||||
#endif
|
||||
|
||||
if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
|
||||
DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
|
||||
"better performance thanks to write-combining\n");
|
||||
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
#else
|
||||
/* For architectures that don't support WC memory,
|
||||
* mask out the WC flag from the BO
|
||||
*/
|
||||
if (!drm_arch_can_wc_memory())
|
||||
if (!amdgpu_bo_support_uswc(bo->flags))
|
||||
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
#endif
|
||||
|
||||
bo->tbo.bdev = &adev->mman.bdev;
|
||||
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
|
||||
|
@ -1211,6 +1217,42 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
|
|||
trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_bo_move_notify - notification about a BO being released
|
||||
* @bo: pointer to a buffer object
|
||||
*
|
||||
* Wipes VRAM buffers whose contents should not be leaked before the
|
||||
* memory is released.
|
||||
*/
|
||||
void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
|
||||
{
|
||||
struct dma_fence *fence = NULL;
|
||||
struct amdgpu_bo *abo;
|
||||
int r;
|
||||
|
||||
if (!amdgpu_bo_is_amdgpu_bo(bo))
|
||||
return;
|
||||
|
||||
abo = ttm_to_amdgpu_bo(bo);
|
||||
|
||||
if (abo->kfd_bo)
|
||||
amdgpu_amdkfd_unreserve_memory_limit(abo);
|
||||
|
||||
if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
|
||||
!(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
|
||||
return;
|
||||
|
||||
reservation_object_lock(bo->base.resv, NULL);
|
||||
|
||||
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
|
||||
if (!WARN_ON(r)) {
|
||||
amdgpu_bo_fence(abo, fence, false);
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
reservation_object_unlock(bo->base.resv);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_bo_fault_reserve_notify - notification about a memory fault
|
||||
* @bo: pointer to a buffer object
|
||||
|
|
|
@ -264,6 +264,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
|
|||
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
|
||||
bool evict,
|
||||
struct ttm_mem_reg *new_mem);
|
||||
void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
|
||||
int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
|
||||
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
|
||||
bool shared);
|
||||
|
@ -307,5 +308,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
|
|||
struct seq_file *m);
|
||||
#endif
|
||||
|
||||
bool amdgpu_bo_support_uswc(u64 bo_flags);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -325,13 +325,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
|
|||
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
|
||||
return -EINVAL;
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
if (is_support_sw_smu(adev))
|
||||
current_level = smu_get_performance_level(&adev->smu);
|
||||
else if (adev->powerplay.pp_funcs->get_performance_level)
|
||||
current_level = amdgpu_dpm_get_performance_level(adev);
|
||||
}
|
||||
|
||||
if (strncmp("low", buf, strlen("low")) == 0) {
|
||||
level = AMD_DPM_FORCED_LEVEL_LOW;
|
||||
} else if (strncmp("high", buf, strlen("high")) == 0) {
|
||||
|
@ -355,17 +348,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (amdgim_is_hwperf(adev) &&
|
||||
adev->virt.ops->force_dpm_level) {
|
||||
mutex_lock(&adev->pm.mutex);
|
||||
adev->virt.ops->force_dpm_level(adev, level);
|
||||
mutex_unlock(&adev->pm.mutex);
|
||||
return count;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
/* handle sriov case here */
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (amdgim_is_hwperf(adev) &&
|
||||
adev->virt.ops->force_dpm_level) {
|
||||
mutex_lock(&adev->pm.mutex);
|
||||
adev->virt.ops->force_dpm_level(adev, level);
|
||||
mutex_unlock(&adev->pm.mutex);
|
||||
return count;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_support_sw_smu(adev))
|
||||
current_level = smu_get_performance_level(&adev->smu);
|
||||
else if (adev->powerplay.pp_funcs->get_performance_level)
|
||||
current_level = amdgpu_dpm_get_performance_level(adev);
|
||||
|
||||
if (current_level == level)
|
||||
return count;
|
||||
|
@ -746,10 +745,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
|
|||
}
|
||||
|
||||
/**
|
||||
* DOC: ppfeatures
|
||||
* DOC: pp_features
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for adjusting what powerplay
|
||||
* features to be enabled. The file ppfeatures is used for this. And
|
||||
* features to be enabled. The file pp_features is used for this. And
|
||||
* this is only available for Vega10 and later dGPUs.
|
||||
*
|
||||
* Reading back the file will show you the followings:
|
||||
|
@ -761,7 +760,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
|
|||
* the corresponding bit from original ppfeature masks and input the
|
||||
* new ppfeature masks.
|
||||
*/
|
||||
static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
|
||||
static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
|
@ -778,7 +777,7 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
|
|||
pr_debug("featuremask = 0x%llx\n", featuremask);
|
||||
|
||||
if (is_support_sw_smu(adev)) {
|
||||
ret = smu_set_ppfeature_status(&adev->smu, featuremask);
|
||||
ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
|
||||
|
@ -790,7 +789,7 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
|
|||
return count;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
|
||||
static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
|
@ -798,7 +797,7 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
|
|||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
|
||||
if (is_support_sw_smu(adev)) {
|
||||
return smu_get_ppfeature_status(&adev->smu, buf);
|
||||
return smu_sys_get_pp_feature_mask(&adev->smu, buf);
|
||||
} else if (adev->powerplay.pp_funcs->get_ppfeature_status)
|
||||
return amdgpu_dpm_get_ppfeature_status(adev, buf);
|
||||
|
||||
|
@ -1458,9 +1457,9 @@ static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
|
|||
static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
|
||||
amdgpu_get_memory_busy_percent, NULL);
|
||||
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
|
||||
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_ppfeature_status,
|
||||
amdgpu_set_ppfeature_status);
|
||||
static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_pp_feature_status,
|
||||
amdgpu_set_pp_feature_status);
|
||||
static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
||||
|
@ -1625,20 +1624,16 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
|
|||
(adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_support_sw_smu(adev)) {
|
||||
err = kstrtoint(buf, 10, &value);
|
||||
if (err)
|
||||
return err;
|
||||
err = kstrtoint(buf, 10, &value);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (is_support_sw_smu(adev)) {
|
||||
smu_set_fan_control_mode(&adev->smu, value);
|
||||
} else {
|
||||
if (!adev->powerplay.pp_funcs->set_fan_control_mode)
|
||||
return -EINVAL;
|
||||
|
||||
err = kstrtoint(buf, 10, &value);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
amdgpu_dpm_set_fan_control_mode(adev, value);
|
||||
}
|
||||
|
||||
|
@ -2058,16 +2053,18 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
|
|||
return err;
|
||||
|
||||
value = value / 1000000; /* convert to Watt */
|
||||
|
||||
if (is_support_sw_smu(adev)) {
|
||||
adev->smu.funcs->set_power_limit(&adev->smu, value);
|
||||
err = smu_set_power_limit(&adev->smu, value);
|
||||
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
|
||||
err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
|
||||
if (err)
|
||||
return err;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -2917,10 +2914,10 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
|||
if ((adev->asic_type >= CHIP_VEGA10) &&
|
||||
!(adev->flags & AMD_IS_APU)) {
|
||||
ret = device_create_file(adev->dev,
|
||||
&dev_attr_ppfeatures);
|
||||
&dev_attr_pp_features);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file "
|
||||
"ppfeatures\n");
|
||||
"pp_features\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@ -2974,7 +2971,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
|
|||
device_remove_file(adev->dev, &dev_attr_unique_id);
|
||||
if ((adev->asic_type >= CHIP_VEGA10) &&
|
||||
!(adev->flags & AMD_IS_APU))
|
||||
device_remove_file(adev->dev, &dev_attr_ppfeatures);
|
||||
device_remove_file(adev->dev, &dev_attr_pp_features);
|
||||
}
|
||||
|
||||
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
|
||||
|
|
|
@ -53,10 +53,13 @@ static int psp_early_init(void *handle)
|
|||
psp->autoload_supported = false;
|
||||
break;
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ARCTURUS:
|
||||
psp_v11_0_set_psp_funcs(psp);
|
||||
psp->autoload_supported = false;
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
psp_v11_0_set_psp_funcs(psp);
|
||||
psp->autoload_supported = true;
|
||||
break;
|
||||
|
@ -162,8 +165,8 @@ psp_cmd_submit_buf(struct psp_context *psp,
|
|||
if (ucode)
|
||||
DRM_WARN("failed to load ucode id (%d) ",
|
||||
ucode->ucode_id);
|
||||
DRM_WARN("psp command failed and response status is (%d)\n",
|
||||
psp->cmd_buf_mem->resp.status);
|
||||
DRM_WARN("psp command failed and response status is (0x%X)\n",
|
||||
psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK);
|
||||
if (!timeout) {
|
||||
mutex_unlock(&psp->mutex);
|
||||
return -EINVAL;
|
||||
|
@ -831,7 +834,6 @@ static int psp_hw_start(struct psp_context *psp)
|
|||
"XGMI: Failed to initialize XGMI session\n");
|
||||
}
|
||||
|
||||
|
||||
if (psp->adev->psp.ta_fw) {
|
||||
ret = psp_ras_initialize(psp);
|
||||
if (ret)
|
||||
|
@ -852,6 +854,24 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
|
|||
case AMDGPU_UCODE_ID_SDMA1:
|
||||
*type = GFX_FW_TYPE_SDMA1;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA2:
|
||||
*type = GFX_FW_TYPE_SDMA2;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA3:
|
||||
*type = GFX_FW_TYPE_SDMA3;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA4:
|
||||
*type = GFX_FW_TYPE_SDMA4;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA5:
|
||||
*type = GFX_FW_TYPE_SDMA5;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA6:
|
||||
*type = GFX_FW_TYPE_SDMA6;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_SDMA7:
|
||||
*type = GFX_FW_TYPE_SDMA7;
|
||||
break;
|
||||
case AMDGPU_UCODE_ID_CP_CE:
|
||||
*type = GFX_FW_TYPE_CP_CE;
|
||||
break;
|
||||
|
@ -980,12 +1000,20 @@ out:
|
|||
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
|
||||
(psp_smu_reload_quirk(psp) || psp->autoload_supported))
|
||||
continue;
|
||||
|
||||
if (amdgpu_sriov_vf(adev) &&
|
||||
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
|
||||
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1
|
||||
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2
|
||||
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3
|
||||
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4
|
||||
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
|
||||
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
|
||||
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
|
||||
|| ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
|
||||
/*skip ucode loading in SRIOV VF */
|
||||
continue;
|
||||
|
||||
if (psp->autoload_supported &&
|
||||
(ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
|
||||
ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
|
||||
|
@ -997,7 +1025,8 @@ out:
|
|||
return ret;
|
||||
|
||||
/* Start rlc autoload after psp recieved all the gfx firmware */
|
||||
if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
|
||||
if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM ||
|
||||
(adev->asic_type == CHIP_NAVI12 && ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) {
|
||||
ret = psp_rlc_autoload(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to start rlc autoload\n");
|
||||
|
|
|
@ -30,74 +30,6 @@
|
|||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_atomfirmware.h"
|
||||
|
||||
struct ras_ih_data {
|
||||
/* interrupt bottom half */
|
||||
struct work_struct ih_work;
|
||||
int inuse;
|
||||
/* IP callback */
|
||||
ras_ih_cb cb;
|
||||
/* full of entries */
|
||||
unsigned char *ring;
|
||||
unsigned int ring_size;
|
||||
unsigned int element_size;
|
||||
unsigned int aligned_element_size;
|
||||
unsigned int rptr;
|
||||
unsigned int wptr;
|
||||
};
|
||||
|
||||
struct ras_fs_data {
|
||||
char sysfs_name[32];
|
||||
char debugfs_name[32];
|
||||
};
|
||||
|
||||
struct ras_err_data {
|
||||
unsigned long ue_count;
|
||||
unsigned long ce_count;
|
||||
};
|
||||
|
||||
struct ras_err_handler_data {
|
||||
/* point to bad pages array */
|
||||
struct {
|
||||
unsigned long bp;
|
||||
struct amdgpu_bo *bo;
|
||||
} *bps;
|
||||
/* the count of entries */
|
||||
int count;
|
||||
/* the space can place new entries */
|
||||
int space_left;
|
||||
/* last reserved entry's index + 1 */
|
||||
int last_reserved;
|
||||
};
|
||||
|
||||
struct ras_manager {
|
||||
struct ras_common_if head;
|
||||
/* reference count */
|
||||
int use;
|
||||
/* ras block link */
|
||||
struct list_head node;
|
||||
/* the device */
|
||||
struct amdgpu_device *adev;
|
||||
/* debugfs */
|
||||
struct dentry *ent;
|
||||
/* sysfs */
|
||||
struct device_attribute sysfs_attr;
|
||||
int attr_inuse;
|
||||
|
||||
/* fs node name */
|
||||
struct ras_fs_data fs_data;
|
||||
|
||||
/* IH data */
|
||||
struct ras_ih_data ih_data;
|
||||
|
||||
struct ras_err_data err_data;
|
||||
};
|
||||
|
||||
struct ras_badpage {
|
||||
unsigned int bp;
|
||||
unsigned int size;
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
const char *ras_error_string[] = {
|
||||
"none",
|
||||
"parity",
|
||||
|
@ -130,6 +62,9 @@ const char *ras_block_string[] = {
|
|||
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
|
||||
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
|
||||
|
||||
/* inject address is 52 bits */
|
||||
#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52)
|
||||
|
||||
static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
|
||||
uint64_t offset, uint64_t size,
|
||||
struct amdgpu_bo **bo_ptr);
|
||||
|
@ -223,9 +158,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
return -EINVAL;
|
||||
|
||||
data->head.block = block_id;
|
||||
data->head.type = memcmp("ue", err, 2) == 0 ?
|
||||
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE :
|
||||
AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
|
||||
/* only ue and ce errors are supported */
|
||||
if (!memcmp("ue", err, 2))
|
||||
data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
|
||||
else if (!memcmp("ce", err, 2))
|
||||
data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
data->op = op;
|
||||
|
||||
if (op == 2) {
|
||||
|
@ -310,7 +250,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
|
|||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
|
||||
struct ras_debug_if data;
|
||||
struct amdgpu_bo *bo;
|
||||
int ret = 0;
|
||||
|
||||
ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
|
||||
|
@ -328,17 +267,14 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
|
|||
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
|
||||
break;
|
||||
case 2:
|
||||
ret = amdgpu_ras_reserve_vram(adev,
|
||||
data.inject.address, PAGE_SIZE, &bo);
|
||||
if (ret) {
|
||||
/* address was offset, now it is absolute.*/
|
||||
data.inject.address += adev->gmc.vram_start;
|
||||
if (data.inject.address > adev->gmc.vram_end)
|
||||
break;
|
||||
} else
|
||||
data.inject.address = amdgpu_bo_gpu_offset(bo);
|
||||
if ((data.inject.address >= adev->gmc.mc_vram_size) ||
|
||||
(data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* data.inject.address is offset instead of absolute gpu address */
|
||||
ret = amdgpu_ras_error_inject(adev, &data.inject);
|
||||
amdgpu_ras_release_vram(adev, &bo);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
|
@ -656,14 +592,42 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
|
|||
struct ras_query_if *info)
|
||||
{
|
||||
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
|
||||
struct ras_err_data err_data = {0, 0, 0, NULL};
|
||||
|
||||
if (!obj)
|
||||
return -EINVAL;
|
||||
/* TODO might read the register to read the count */
|
||||
|
||||
switch (info->head.block) {
|
||||
case AMDGPU_RAS_BLOCK__UMC:
|
||||
if (adev->umc.funcs->query_ras_error_count)
|
||||
adev->umc.funcs->query_ras_error_count(adev, &err_data);
|
||||
/* umc query_ras_error_address is also responsible for clearing
|
||||
* error status
|
||||
*/
|
||||
if (adev->umc.funcs->query_ras_error_address)
|
||||
adev->umc.funcs->query_ras_error_address(adev, &err_data);
|
||||
break;
|
||||
case AMDGPU_RAS_BLOCK__GFX:
|
||||
if (adev->gfx.funcs->query_ras_error_count)
|
||||
adev->gfx.funcs->query_ras_error_count(adev, &err_data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
obj->err_data.ue_count += err_data.ue_count;
|
||||
obj->err_data.ce_count += err_data.ce_count;
|
||||
|
||||
info->ue_count = obj->err_data.ue_count;
|
||||
info->ce_count = obj->err_data.ce_count;
|
||||
|
||||
if (err_data.ce_count)
|
||||
dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
|
||||
obj->err_data.ce_count, ras_block_str(info->head.block));
|
||||
if (err_data.ue_count)
|
||||
dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
|
||||
obj->err_data.ue_count, ras_block_str(info->head.block));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -684,13 +648,22 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
|
|||
if (!obj)
|
||||
return -EINVAL;
|
||||
|
||||
if (block_info.block_id != TA_RAS_BLOCK__UMC) {
|
||||
switch (info->head.block) {
|
||||
case AMDGPU_RAS_BLOCK__GFX:
|
||||
if (adev->gfx.funcs->ras_error_inject)
|
||||
ret = adev->gfx.funcs->ras_error_inject(adev, info);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
case AMDGPU_RAS_BLOCK__UMC:
|
||||
ret = psp_ras_trigger_error(&adev->psp, &block_info);
|
||||
break;
|
||||
default:
|
||||
DRM_INFO("%s error injection is not supported yet\n",
|
||||
ras_block_str(info->head.block));
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
ret = psp_ras_trigger_error(&adev->psp, &block_info);
|
||||
if (ret)
|
||||
DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
|
||||
ras_block_str(info->head.block),
|
||||
|
@ -816,25 +789,18 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
|
|||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
struct ras_common_if head;
|
||||
int ras_block_count = AMDGPU_RAS_BLOCK_COUNT;
|
||||
int i;
|
||||
int i, enabled;
|
||||
ssize_t s;
|
||||
struct ras_manager *obj;
|
||||
|
||||
s = scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
|
||||
|
||||
for (i = 0; i < ras_block_count; i++) {
|
||||
head.block = i;
|
||||
enabled = amdgpu_ras_is_feature_enabled(adev, &head);
|
||||
|
||||
if (amdgpu_ras_is_feature_enabled(adev, &head)) {
|
||||
obj = amdgpu_ras_find_obj(adev, &head);
|
||||
s += scnprintf(&buf[s], PAGE_SIZE - s,
|
||||
"%s: %s\n",
|
||||
ras_block_str(i),
|
||||
ras_err_str(obj->head.type));
|
||||
} else
|
||||
s += scnprintf(&buf[s], PAGE_SIZE - s,
|
||||
"%s: disabled\n",
|
||||
ras_block_str(i));
|
||||
s += scnprintf(&buf[s], PAGE_SIZE - s,
|
||||
"%s ras feature mask: %s\n",
|
||||
ras_block_str(i), enabled?"on":"off");
|
||||
}
|
||||
|
||||
return s;
|
||||
|
@ -1054,6 +1020,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
|
|||
struct ras_ih_data *data = &obj->ih_data;
|
||||
struct amdgpu_iv_entry entry;
|
||||
int ret;
|
||||
struct ras_err_data err_data = {0, 0, 0, NULL};
|
||||
|
||||
while (data->rptr != data->wptr) {
|
||||
rmb();
|
||||
|
@ -1068,19 +1035,19 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
|
|||
* from the callback to udpate the error type/count, etc
|
||||
*/
|
||||
if (data->cb) {
|
||||
ret = data->cb(obj->adev, &entry);
|
||||
ret = data->cb(obj->adev, &err_data, &entry);
|
||||
/* ue will trigger an interrupt, and in that case
|
||||
* we need do a reset to recovery the whole system.
|
||||
* But leave IP do that recovery, here we just dispatch
|
||||
* the error.
|
||||
*/
|
||||
if (ret == AMDGPU_RAS_UE) {
|
||||
obj->err_data.ue_count++;
|
||||
if (ret == AMDGPU_RAS_SUCCESS) {
|
||||
/* these counts could be left as 0 if
|
||||
* some blocks do not count error number
|
||||
*/
|
||||
obj->err_data.ue_count += err_data.ue_count;
|
||||
obj->err_data.ce_count += err_data.ce_count;
|
||||
}
|
||||
/* Might need get ce count by register, but not all IP
|
||||
* saves ce count, some IP just use one bit or two bits
|
||||
* to indicate ce happened.
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1577,6 +1544,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
|||
if (amdgpu_ras_fs_init(adev))
|
||||
goto fs_out;
|
||||
|
||||
/* ras init for each ras block */
|
||||
if (adev->umc.funcs->ras_init)
|
||||
adev->umc.funcs->ras_init(adev);
|
||||
|
||||
DRM_INFO("RAS INFO: ras initialized successfully, "
|
||||
"hardware ability[%x] ras_mask[%x]\n",
|
||||
con->hw_supported, con->supported);
|
||||
|
|
|
@ -52,6 +52,236 @@ enum amdgpu_ras_block {
|
|||
#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
|
||||
#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
|
||||
|
||||
enum amdgpu_ras_gfx_subblock {
|
||||
/* CPC */
|
||||
AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
|
||||
AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
|
||||
AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
|
||||
AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
|
||||
AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
|
||||
AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
|
||||
AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
|
||||
AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
|
||||
/* CPF */
|
||||
AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
|
||||
AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
|
||||
/* CPG */
|
||||
AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
|
||||
AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
|
||||
AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
|
||||
/* GDS */
|
||||
AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
|
||||
AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
|
||||
/* SPI */
|
||||
AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
|
||||
/* SQ */
|
||||
AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
|
||||
/* SQC (3 ranges) */
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
|
||||
/* SQC range 0 */
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
|
||||
/* SQC range 1 */
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
|
||||
/* SQC range 2 */
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
|
||||
/* TA */
|
||||
AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
|
||||
AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
|
||||
/* TCA */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
|
||||
/* TCC (5 sub-ranges) */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
|
||||
/* TCC range 0 */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
|
||||
/* TCC range 1 */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
|
||||
/* TCC range 2 */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
|
||||
/* TCC range 3 */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
|
||||
/* TCC range 4 */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
|
||||
/* TCI */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
|
||||
/* TCP */
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
|
||||
/* TD */
|
||||
AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
|
||||
AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
|
||||
AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
|
||||
AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
|
||||
/* EA (3 sub-ranges) */
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
|
||||
/* EA range 0 */
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
|
||||
/* EA range 1 */
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
|
||||
/* EA range 2 */
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
|
||||
AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
|
||||
/* UTC VM L2 bank */
|
||||
AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
|
||||
/* UTC VM walker */
|
||||
AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
|
||||
/* UTC ATC L2 2MB cache */
|
||||
AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
|
||||
/* UTC ATC L2 4KB cache */
|
||||
AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
|
||||
AMDGPU_RAS_BLOCK__GFX_MAX
|
||||
};
|
||||
|
||||
enum amdgpu_ras_error_type {
|
||||
AMDGPU_RAS_ERROR__NONE = 0,
|
||||
AMDGPU_RAS_ERROR__PARITY = 1,
|
||||
|
@ -76,9 +306,6 @@ struct ras_common_if {
|
|||
char name[32];
|
||||
};
|
||||
|
||||
typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
|
||||
struct amdgpu_ras {
|
||||
/* ras infrastructure */
|
||||
/* for ras itself. */
|
||||
|
@ -108,8 +335,81 @@ struct amdgpu_ras {
|
|||
uint32_t flags;
|
||||
};
|
||||
|
||||
/* interfaces for IP */
|
||||
struct ras_fs_data {
|
||||
char sysfs_name[32];
|
||||
char debugfs_name[32];
|
||||
};
|
||||
|
||||
struct ras_err_data {
|
||||
unsigned long ue_count;
|
||||
unsigned long ce_count;
|
||||
unsigned long err_addr_cnt;
|
||||
uint64_t *err_addr;
|
||||
};
|
||||
|
||||
struct ras_err_handler_data {
|
||||
/* point to bad pages array */
|
||||
struct {
|
||||
unsigned long bp;
|
||||
struct amdgpu_bo *bo;
|
||||
} *bps;
|
||||
/* the count of entries */
|
||||
int count;
|
||||
/* the space can place new entries */
|
||||
int space_left;
|
||||
/* last reserved entry's index + 1 */
|
||||
int last_reserved;
|
||||
};
|
||||
|
||||
typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
|
||||
struct ras_ih_data {
|
||||
/* interrupt bottom half */
|
||||
struct work_struct ih_work;
|
||||
int inuse;
|
||||
/* IP callback */
|
||||
ras_ih_cb cb;
|
||||
/* full of entries */
|
||||
unsigned char *ring;
|
||||
unsigned int ring_size;
|
||||
unsigned int element_size;
|
||||
unsigned int aligned_element_size;
|
||||
unsigned int rptr;
|
||||
unsigned int wptr;
|
||||
};
|
||||
|
||||
struct ras_manager {
|
||||
struct ras_common_if head;
|
||||
/* reference count */
|
||||
int use;
|
||||
/* ras block link */
|
||||
struct list_head node;
|
||||
/* the device */
|
||||
struct amdgpu_device *adev;
|
||||
/* debugfs */
|
||||
struct dentry *ent;
|
||||
/* sysfs */
|
||||
struct device_attribute sysfs_attr;
|
||||
int attr_inuse;
|
||||
|
||||
/* fs node name */
|
||||
struct ras_fs_data fs_data;
|
||||
|
||||
/* IH data */
|
||||
struct ras_ih_data ih_data;
|
||||
|
||||
struct ras_err_data err_data;
|
||||
};
|
||||
|
||||
struct ras_badpage {
|
||||
unsigned int bp;
|
||||
unsigned int size;
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
/* interfaces for IP */
|
||||
struct ras_fs_if {
|
||||
struct ras_common_if head;
|
||||
char sysfs_name[32];
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include <drm/drm_print.h>
|
||||
|
||||
/* max number of rings */
|
||||
#define AMDGPU_MAX_RINGS 24
|
||||
#define AMDGPU_MAX_RINGS 28
|
||||
#define AMDGPU_MAX_GFX_RINGS 2
|
||||
#define AMDGPU_MAX_COMPUTE_RINGS 8
|
||||
#define AMDGPU_MAX_VCE_RINGS 3
|
||||
|
|
|
@ -25,11 +25,17 @@
|
|||
#define __AMDGPU_SDMA_H__
|
||||
|
||||
/* max number of IP instances */
|
||||
#define AMDGPU_MAX_SDMA_INSTANCES 2
|
||||
#define AMDGPU_MAX_SDMA_INSTANCES 8
|
||||
|
||||
enum amdgpu_sdma_irq {
|
||||
AMDGPU_SDMA_IRQ_INSTANCE0 = 0,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE1,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE2,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE3,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE4,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE5,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE6,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE7,
|
||||
AMDGPU_SDMA_IRQ_LAST
|
||||
};
|
||||
|
||||
|
|
|
@ -444,6 +444,22 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
|
|||
if (r)
|
||||
goto error;
|
||||
|
||||
/* clear the space being freed */
|
||||
if (old_mem->mem_type == TTM_PL_VRAM &&
|
||||
(ttm_to_amdgpu_bo(bo)->flags &
|
||||
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
|
||||
struct dma_fence *wipe_fence = NULL;
|
||||
|
||||
r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
|
||||
NULL, &wipe_fence);
|
||||
if (r) {
|
||||
goto error;
|
||||
} else if (wipe_fence) {
|
||||
dma_fence_put(fence);
|
||||
fence = wipe_fence;
|
||||
}
|
||||
}
|
||||
|
||||
/* Always block for VM page tables before committing the new location */
|
||||
if (bo->type == ttm_bo_type_kernel)
|
||||
r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
|
||||
|
@ -1599,6 +1615,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
|
|||
.move = &amdgpu_bo_move,
|
||||
.verify_access = &amdgpu_verify_access,
|
||||
.move_notify = &amdgpu_bo_move_notify,
|
||||
.release_notify = &amdgpu_bo_release_notify,
|
||||
.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
|
||||
.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
|
||||
.io_mem_free = &amdgpu_ttm_io_mem_free,
|
||||
|
|
|
@ -38,6 +38,8 @@
|
|||
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
|
||||
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
|
||||
|
||||
#define AMDGPU_POISON 0xd0bed0be
|
||||
|
||||
struct amdgpu_mman {
|
||||
struct ttm_bo_device bdev;
|
||||
bool mem_global_referenced;
|
||||
|
|
|
@ -269,6 +269,16 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
|
|||
DRM_DEBUG("kdb_size_bytes: %u\n",
|
||||
le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes));
|
||||
}
|
||||
if (version_minor == 2) {
|
||||
const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 =
|
||||
container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0);
|
||||
DRM_DEBUG("kdb_header_version: %u\n",
|
||||
le32_to_cpu(psp_hdr_v1_2->kdb_header_version));
|
||||
DRM_DEBUG("kdb_offset_bytes: %u\n",
|
||||
le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes));
|
||||
DRM_DEBUG("kdb_size_bytes: %u\n",
|
||||
le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes));
|
||||
}
|
||||
} else {
|
||||
DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
|
||||
version_major, version_minor);
|
||||
|
@ -351,10 +361,14 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
|
|||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
if (!load_type)
|
||||
return AMDGPU_FW_LOAD_DIRECT;
|
||||
else
|
||||
return AMDGPU_FW_LOAD_PSP;
|
||||
case CHIP_ARCTURUS:
|
||||
return AMDGPU_FW_LOAD_DIRECT;
|
||||
default:
|
||||
DRM_ERROR("Unknown firmware load type\n");
|
||||
}
|
||||
|
|
|
@ -90,6 +90,15 @@ struct psp_firmware_header_v1_1 {
|
|||
uint32_t kdb_size_bytes;
|
||||
};
|
||||
|
||||
/* version_major=1, version_minor=2 */
|
||||
struct psp_firmware_header_v1_2 {
|
||||
struct psp_firmware_header_v1_0 v1_0;
|
||||
uint32_t reserve[3];
|
||||
uint32_t kdb_header_version;
|
||||
uint32_t kdb_offset_bytes;
|
||||
uint32_t kdb_size_bytes;
|
||||
};
|
||||
|
||||
/* version_major=1, version_minor=0 */
|
||||
struct ta_firmware_header_v1_0 {
|
||||
struct common_firmware_header header;
|
||||
|
@ -262,6 +271,12 @@ union amdgpu_firmware_header {
|
|||
enum AMDGPU_UCODE_ID {
|
||||
AMDGPU_UCODE_ID_SDMA0 = 0,
|
||||
AMDGPU_UCODE_ID_SDMA1,
|
||||
AMDGPU_UCODE_ID_SDMA2,
|
||||
AMDGPU_UCODE_ID_SDMA3,
|
||||
AMDGPU_UCODE_ID_SDMA4,
|
||||
AMDGPU_UCODE_ID_SDMA5,
|
||||
AMDGPU_UCODE_ID_SDMA6,
|
||||
AMDGPU_UCODE_ID_SDMA7,
|
||||
AMDGPU_UCODE_ID_CP_CE,
|
||||
AMDGPU_UCODE_ID_CP_PFP,
|
||||
AMDGPU_UCODE_ID_CP_ME,
|
||||
|
@ -281,6 +296,7 @@ enum AMDGPU_UCODE_ID {
|
|||
AMDGPU_UCODE_ID_UVD1,
|
||||
AMDGPU_UCODE_ID_VCE,
|
||||
AMDGPU_UCODE_ID_VCN,
|
||||
AMDGPU_UCODE_ID_VCN1,
|
||||
AMDGPU_UCODE_ID_DMCU_ERAM,
|
||||
AMDGPU_UCODE_ID_DMCU_INTV,
|
||||
AMDGPU_UCODE_ID_VCN0_RAM,
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __AMDGPU_UMC_H__
|
||||
#define __AMDGPU_UMC_H__
|
||||
|
||||
/* implement 64 bits REG operations via 32 bits interface */
|
||||
#define RREG64_UMC(reg) (RREG32(reg) | \
|
||||
((uint64_t)RREG32((reg) + 1) << 32))
|
||||
#define WREG64_UMC(reg, v) \
|
||||
do { \
|
||||
WREG32((reg), lower_32_bits(v)); \
|
||||
WREG32((reg) + 1, upper_32_bits(v)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data,
|
||||
* uint32_t umc_reg_offset, uint32_t channel_index)
|
||||
*/
|
||||
#define amdgpu_umc_for_each_channel(func) \
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; \
|
||||
uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index; \
|
||||
for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { \
|
||||
/* enable the index mode to query eror count per channel */ \
|
||||
adev->umc.funcs->enable_umc_index_mode(adev, umc_inst); \
|
||||
for (channel_inst = 0; \
|
||||
channel_inst < adev->umc.channel_inst_num; \
|
||||
channel_inst++) { \
|
||||
/* calc the register offset according to channel instance */ \
|
||||
umc_reg_offset = adev->umc.channel_offs * channel_inst; \
|
||||
/* get channel index of interleaved memory */ \
|
||||
channel_index = adev->umc.channel_idx_tbl[ \
|
||||
umc_inst * adev->umc.channel_inst_num + channel_inst]; \
|
||||
(func)(adev, err_data, umc_reg_offset, channel_index); \
|
||||
} \
|
||||
} \
|
||||
adev->umc.funcs->disable_umc_index_mode(adev);
|
||||
|
||||
struct amdgpu_umc_funcs {
|
||||
void (*ras_init)(struct amdgpu_device *adev);
|
||||
void (*query_ras_error_count)(struct amdgpu_device *adev,
|
||||
void *ras_error_status);
|
||||
void (*query_ras_error_address)(struct amdgpu_device *adev,
|
||||
void *ras_error_status);
|
||||
void (*enable_umc_index_mode)(struct amdgpu_device *adev,
|
||||
uint32_t umc_instance);
|
||||
void (*disable_umc_index_mode)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_umc {
|
||||
/* max error count in one ras query call */
|
||||
uint32_t max_ras_err_cnt_per_query;
|
||||
/* number of umc channel instance with memory map register access */
|
||||
uint32_t channel_inst_num;
|
||||
/* number of umc instance with memory map register access */
|
||||
uint32_t umc_inst_num;
|
||||
/* UMC regiser per channel offset */
|
||||
uint32_t channel_offs;
|
||||
/* channel index table of interleaved memory */
|
||||
const uint32_t *channel_idx_tbl;
|
||||
|
||||
const struct amdgpu_umc_funcs *funcs;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -46,12 +46,18 @@
|
|||
#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
|
||||
#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
|
||||
#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
|
||||
#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
|
||||
#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
|
||||
#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
|
||||
#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
|
||||
|
||||
MODULE_FIRMWARE(FIRMWARE_RAVEN);
|
||||
MODULE_FIRMWARE(FIRMWARE_PICASSO);
|
||||
MODULE_FIRMWARE(FIRMWARE_RAVEN2);
|
||||
MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
|
||||
MODULE_FIRMWARE(FIRMWARE_NAVI10);
|
||||
MODULE_FIRMWARE(FIRMWARE_NAVI14);
|
||||
MODULE_FIRMWARE(FIRMWARE_NAVI12);
|
||||
|
||||
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
|
||||
|
||||
|
@ -61,7 +67,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
|
|||
const char *fw_name;
|
||||
const struct common_firmware_header *hdr;
|
||||
unsigned char fw_check;
|
||||
int r;
|
||||
int i, r;
|
||||
|
||||
INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
|
||||
|
||||
|
@ -74,12 +80,27 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
|
|||
else
|
||||
fw_name = FIRMWARE_RAVEN;
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
fw_name = FIRMWARE_ARCTURUS;
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
fw_name = FIRMWARE_NAVI10;
|
||||
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
|
||||
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
|
||||
adev->vcn.indirect_sram = true;
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
fw_name = FIRMWARE_NAVI14;
|
||||
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
|
||||
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
|
||||
adev->vcn.indirect_sram = true;
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
fw_name = FIRMWARE_NAVI12;
|
||||
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
|
||||
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
|
||||
adev->vcn.indirect_sram = true;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -133,12 +154,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
|
|||
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
|
||||
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
|
||||
&adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
|
||||
return r;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
|
||||
&adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
if (adev->vcn.indirect_sram) {
|
||||
|
@ -156,26 +183,30 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
|
|||
|
||||
int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
kvfree(adev->vcn.saved_bo);
|
||||
int i, j;
|
||||
|
||||
if (adev->vcn.indirect_sram) {
|
||||
amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
|
||||
&adev->vcn.dpg_sram_gpu_addr,
|
||||
(void **)&adev->vcn.dpg_sram_cpu_addr);
|
||||
&adev->vcn.dpg_sram_gpu_addr,
|
||||
(void **)&adev->vcn.dpg_sram_cpu_addr);
|
||||
}
|
||||
|
||||
amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
|
||||
&adev->vcn.gpu_addr,
|
||||
(void **)&adev->vcn.cpu_addr);
|
||||
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
|
||||
if (adev->vcn.harvest_config & (1 << j))
|
||||
continue;
|
||||
kvfree(adev->vcn.inst[j].saved_bo);
|
||||
|
||||
amdgpu_ring_fini(&adev->vcn.ring_dec);
|
||||
amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
|
||||
&adev->vcn.inst[j].gpu_addr,
|
||||
(void **)&adev->vcn.inst[j].cpu_addr);
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
|
||||
amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
|
||||
amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
|
||||
|
||||
amdgpu_ring_fini(&adev->vcn.ring_jpeg);
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
|
||||
amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
|
||||
|
||||
amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg);
|
||||
}
|
||||
|
||||
release_firmware(adev->vcn.fw);
|
||||
|
||||
|
@ -186,21 +217,25 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
|
|||
{
|
||||
unsigned size;
|
||||
void *ptr;
|
||||
int i;
|
||||
|
||||
cancel_delayed_work_sync(&adev->vcn.idle_work);
|
||||
|
||||
if (adev->vcn.vcpu_bo == NULL)
|
||||
return 0;
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
if (adev->vcn.inst[i].vcpu_bo == NULL)
|
||||
return 0;
|
||||
|
||||
size = amdgpu_bo_size(adev->vcn.vcpu_bo);
|
||||
ptr = adev->vcn.cpu_addr;
|
||||
size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
|
||||
ptr = adev->vcn.inst[i].cpu_addr;
|
||||
|
||||
adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL);
|
||||
if (!adev->vcn.saved_bo)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy_fromio(adev->vcn.saved_bo, ptr, size);
|
||||
adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
|
||||
if (!adev->vcn.inst[i].saved_bo)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -208,32 +243,36 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
|
|||
{
|
||||
unsigned size;
|
||||
void *ptr;
|
||||
int i;
|
||||
|
||||
if (adev->vcn.vcpu_bo == NULL)
|
||||
return -EINVAL;
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
if (adev->vcn.inst[i].vcpu_bo == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
size = amdgpu_bo_size(adev->vcn.vcpu_bo);
|
||||
ptr = adev->vcn.cpu_addr;
|
||||
size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
|
||||
ptr = adev->vcn.inst[i].cpu_addr;
|
||||
|
||||
if (adev->vcn.saved_bo != NULL) {
|
||||
memcpy_toio(ptr, adev->vcn.saved_bo, size);
|
||||
kvfree(adev->vcn.saved_bo);
|
||||
adev->vcn.saved_bo = NULL;
|
||||
} else {
|
||||
const struct common_firmware_header *hdr;
|
||||
unsigned offset;
|
||||
if (adev->vcn.inst[i].saved_bo != NULL) {
|
||||
memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
|
||||
kvfree(adev->vcn.inst[i].saved_bo);
|
||||
adev->vcn.inst[i].saved_bo = NULL;
|
||||
} else {
|
||||
const struct common_firmware_header *hdr;
|
||||
unsigned offset;
|
||||
|
||||
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
|
||||
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
|
||||
memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
|
||||
le32_to_cpu(hdr->ucode_size_bytes));
|
||||
size -= le32_to_cpu(hdr->ucode_size_bytes);
|
||||
ptr += le32_to_cpu(hdr->ucode_size_bytes);
|
||||
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
|
||||
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
|
||||
memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
|
||||
le32_to_cpu(hdr->ucode_size_bytes));
|
||||
size -= le32_to_cpu(hdr->ucode_size_bytes);
|
||||
ptr += le32_to_cpu(hdr->ucode_size_bytes);
|
||||
}
|
||||
memset_io(ptr, 0, size);
|
||||
}
|
||||
memset_io(ptr, 0, size);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -241,35 +280,40 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
|
|||
{
|
||||
struct amdgpu_device *adev =
|
||||
container_of(work, struct amdgpu_device, vcn.idle_work.work);
|
||||
unsigned int fences = 0;
|
||||
unsigned int i;
|
||||
unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
|
||||
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
|
||||
if (adev->vcn.harvest_config & (1 << j))
|
||||
continue;
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
|
||||
}
|
||||
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
|
||||
struct dpg_pause_state new_state;
|
||||
|
||||
if (fence[j])
|
||||
new_state.fw_based = VCN_DPG_STATE__PAUSE;
|
||||
else
|
||||
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
|
||||
|
||||
if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg))
|
||||
new_state.jpeg = VCN_DPG_STATE__PAUSE;
|
||||
else
|
||||
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
|
||||
|
||||
adev->vcn.pause_dpg_mode(adev, &new_state);
|
||||
}
|
||||
|
||||
fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg);
|
||||
fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
|
||||
fences += fence[j];
|
||||
}
|
||||
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
|
||||
struct dpg_pause_state new_state;
|
||||
|
||||
if (fences)
|
||||
new_state.fw_based = VCN_DPG_STATE__PAUSE;
|
||||
else
|
||||
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
|
||||
|
||||
if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
|
||||
new_state.jpeg = VCN_DPG_STATE__PAUSE;
|
||||
else
|
||||
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
|
||||
|
||||
adev->vcn.pause_dpg_mode(adev, &new_state);
|
||||
}
|
||||
|
||||
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
|
||||
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
|
||||
|
||||
if (fences == 0) {
|
||||
amdgpu_gfx_off_ctrl(adev, true);
|
||||
if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled)
|
||||
if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)
|
||||
amdgpu_dpm_enable_uvd(adev, false);
|
||||
else
|
||||
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
|
||||
|
@ -286,7 +330,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
|
|||
|
||||
if (set_clocks) {
|
||||
amdgpu_gfx_off_ctrl(adev, false);
|
||||
if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled)
|
||||
if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)
|
||||
amdgpu_dpm_enable_uvd(adev, true);
|
||||
else
|
||||
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
|
||||
|
@ -299,14 +343,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
|
|||
unsigned int i;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
|
||||
fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
|
||||
}
|
||||
if (fences)
|
||||
new_state.fw_based = VCN_DPG_STATE__PAUSE;
|
||||
else
|
||||
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
|
||||
|
||||
if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
|
||||
if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg))
|
||||
new_state.jpeg = VCN_DPG_STATE__PAUSE;
|
||||
else
|
||||
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
|
||||
|
@ -332,7 +376,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
|
|||
unsigned i;
|
||||
int r;
|
||||
|
||||
WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD);
|
||||
WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -340,7 +384,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
|
|||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
amdgpu_ring_commit(ring);
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(adev->vcn.external.scratch9);
|
||||
tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
break;
|
||||
udelay(1);
|
||||
|
@ -651,7 +695,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
|
|||
unsigned i;
|
||||
int r;
|
||||
|
||||
WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD);
|
||||
WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -661,7 +705,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
|
|||
amdgpu_ring_commit(ring);
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(adev->vcn.external.jpeg_pitch);
|
||||
tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
break;
|
||||
udelay(1);
|
||||
|
@ -735,7 +779,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
}
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(adev->vcn.external.jpeg_pitch);
|
||||
tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
break;
|
||||
udelay(1);
|
||||
|
|
|
@ -30,6 +30,12 @@
|
|||
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
|
||||
#define AMDGPU_VCN_MAX_ENC_RINGS 3
|
||||
|
||||
#define AMDGPU_MAX_VCN_INSTANCES 2
|
||||
|
||||
#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
|
||||
#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
|
||||
|
||||
#define VCN_DEC_KMD_CMD 0x80000000
|
||||
#define VCN_DEC_CMD_FENCE 0x00000000
|
||||
#define VCN_DEC_CMD_TRAP 0x00000001
|
||||
#define VCN_DEC_CMD_WRITE_REG 0x00000004
|
||||
|
@ -145,34 +151,49 @@ struct amdgpu_vcn_reg{
|
|||
unsigned data1;
|
||||
unsigned cmd;
|
||||
unsigned nop;
|
||||
unsigned context_id;
|
||||
unsigned ib_vmid;
|
||||
unsigned ib_bar_low;
|
||||
unsigned ib_bar_high;
|
||||
unsigned ib_size;
|
||||
unsigned gp_scratch8;
|
||||
unsigned scratch9;
|
||||
unsigned jpeg_pitch;
|
||||
};
|
||||
|
||||
struct amdgpu_vcn {
|
||||
struct amdgpu_vcn_inst {
|
||||
struct amdgpu_bo *vcpu_bo;
|
||||
void *cpu_addr;
|
||||
uint64_t gpu_addr;
|
||||
unsigned fw_version;
|
||||
void *saved_bo;
|
||||
struct delayed_work idle_work;
|
||||
const struct firmware *fw; /* VCN firmware */
|
||||
struct amdgpu_ring ring_dec;
|
||||
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
|
||||
struct amdgpu_ring ring_jpeg;
|
||||
struct amdgpu_irq_src irq;
|
||||
struct amdgpu_vcn_reg external;
|
||||
};
|
||||
|
||||
struct amdgpu_vcn {
|
||||
unsigned fw_version;
|
||||
struct delayed_work idle_work;
|
||||
const struct firmware *fw; /* VCN firmware */
|
||||
unsigned num_enc_rings;
|
||||
enum amd_powergating_state cur_state;
|
||||
struct dpg_pause_state pause_state;
|
||||
struct amdgpu_vcn_reg internal, external;
|
||||
int (*pause_dpg_mode)(struct amdgpu_device *adev,
|
||||
struct dpg_pause_state *new_state);
|
||||
|
||||
bool indirect_sram;
|
||||
struct amdgpu_bo *dpg_sram_bo;
|
||||
void *dpg_sram_cpu_addr;
|
||||
uint64_t dpg_sram_gpu_addr;
|
||||
uint32_t *dpg_sram_curr_addr;
|
||||
|
||||
uint8_t num_vcn_inst;
|
||||
struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
|
||||
struct amdgpu_vcn_reg internal;
|
||||
|
||||
unsigned harvest_config;
|
||||
int (*pause_dpg_mode)(struct amdgpu_device *adev,
|
||||
struct dpg_pause_state *new_state);
|
||||
};
|
||||
|
||||
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
|
||||
|
|
|
@ -430,48 +430,3 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
|
|||
|
||||
return clk;
|
||||
}
|
||||
|
||||
void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (virt->ops && virt->ops->init_reg_access_mode)
|
||||
virt->ops->init_reg_access_mode(adev);
|
||||
}
|
||||
|
||||
bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
|
||||
{
|
||||
bool ret = false;
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)
|
||||
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
|
||||
ret = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
|
||||
{
|
||||
bool ret = false;
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)
|
||||
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
|
||||
&& !(amdgpu_sriov_runtime(adev)))
|
||||
ret = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
|
||||
{
|
||||
bool ret = false;
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)
|
||||
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
|
||||
ret = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -48,12 +48,6 @@ struct amdgpu_vf_error_buffer {
|
|||
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
|
||||
};
|
||||
|
||||
/* According to the fw feature, some new reg access modes are supported */
|
||||
#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */
|
||||
#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */
|
||||
#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
|
||||
#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */
|
||||
|
||||
/**
|
||||
* struct amdgpu_virt_ops - amdgpu device virt operations
|
||||
*/
|
||||
|
@ -65,7 +59,6 @@ struct amdgpu_virt_ops {
|
|||
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
|
||||
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
|
||||
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
|
||||
void (*init_reg_access_mode)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -315,10 +308,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
|
|||
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
|
||||
uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
|
||||
uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
|
||||
|
||||
void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1574,7 +1574,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
|
|||
flags &= ~AMDGPU_PTE_EXECUTABLE;
|
||||
flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
|
||||
|
||||
if (adev->asic_type == CHIP_NAVI10) {
|
||||
if (adev->asic_type >= CHIP_NAVI10) {
|
||||
flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
|
||||
flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
|
||||
} else {
|
||||
|
@ -3061,12 +3061,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|||
switch (args->in.op) {
|
||||
case AMDGPU_VM_OP_RESERVE_VMID:
|
||||
/* current, we only have requirement to reserve vmid from gfxhub */
|
||||
r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
|
||||
r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
case AMDGPU_VM_OP_UNRESERVE_VMID:
|
||||
amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
|
||||
amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
|
|
|
@ -90,7 +90,7 @@ struct amdgpu_bo_list_entry;
|
|||
| AMDGPU_PTE_WRITEABLE \
|
||||
| AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
|
||||
|
||||
/* NAVI10 only */
|
||||
/* gfx10 */
|
||||
#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
|
||||
#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
|
||||
|
||||
|
@ -100,9 +100,10 @@ struct amdgpu_bo_list_entry;
|
|||
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
|
||||
|
||||
/* max number of VMHUB */
|
||||
#define AMDGPU_MAX_VMHUBS 2
|
||||
#define AMDGPU_GFXHUB 0
|
||||
#define AMDGPU_MMHUB 1
|
||||
#define AMDGPU_MAX_VMHUBS 3
|
||||
#define AMDGPU_GFXHUB_0 0
|
||||
#define AMDGPU_MMHUB_0 1
|
||||
#define AMDGPU_MMHUB_1 2
|
||||
|
||||
/* hardcode that limit for now */
|
||||
#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20)
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "amdgpu.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_smu.h"
|
||||
|
||||
#include "df/df_3_6_offset.h"
|
||||
|
||||
static DEFINE_MUTEX(xgmi_mutex);
|
||||
|
||||
|
@ -131,9 +131,37 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
|
|||
|
||||
}
|
||||
|
||||
#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801)
|
||||
static ssize_t amdgpu_xgmi_show_error(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in;
|
||||
uint64_t fica_out;
|
||||
unsigned int error_count = 0;
|
||||
|
||||
ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
|
||||
ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
|
||||
|
||||
fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in);
|
||||
if (fica_out != 0x1f)
|
||||
pr_err("xGMI error counters not enabled!\n");
|
||||
|
||||
fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in);
|
||||
|
||||
if ((fica_out & 0xffff) == 2)
|
||||
error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
|
||||
|
||||
adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", error_count);
|
||||
}
|
||||
|
||||
|
||||
static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
|
||||
|
||||
static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
|
||||
|
||||
static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
|
||||
struct amdgpu_hive_info *hive)
|
||||
|
@ -148,6 +176,12 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Create xgmi error file */
|
||||
ret = device_create_file(adev->dev, &dev_attr_xgmi_error);
|
||||
if (ret)
|
||||
pr_err("failed to create xgmi_error\n");
|
||||
|
||||
|
||||
/* Create sysfs link to hive info folder on the first device */
|
||||
if (adev != hive->adev) {
|
||||
ret = sysfs_create_link(&adev->dev->kobj, hive->kobj,
|
||||
|
@ -248,7 +282,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
|
|||
|
||||
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
|
||||
|
||||
if (is_support_sw_smu(adev))
|
||||
if (is_support_sw_smu_xgmi(adev))
|
||||
ret = smu_set_xgmi_pstate(&adev->smu, pstate);
|
||||
if (ret)
|
||||
dev_err(adev->dev,
|
||||
|
@ -296,23 +330,28 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
|||
struct amdgpu_xgmi *entry;
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
|
||||
int count = 0, ret = -EINVAL;
|
||||
int count = 0, ret = 0;
|
||||
|
||||
if (!adev->gmc.xgmi.supported)
|
||||
return 0;
|
||||
|
||||
ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
|
||||
if (ret) {
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Failed to get node id\n");
|
||||
return ret;
|
||||
}
|
||||
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
|
||||
ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
|
||||
if (ret) {
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Failed to get hive id\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
|
||||
if (ret) {
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Failed to get hive id\n");
|
||||
return ret;
|
||||
ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
|
||||
if (ret) {
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Failed to get node id\n");
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
adev->gmc.xgmi.hive_id = 16;
|
||||
adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;
|
||||
}
|
||||
|
||||
hive = amdgpu_get_xgmi_hive(adev, 1);
|
||||
|
@ -332,29 +371,32 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
|||
top_info->num_nodes = count;
|
||||
hive->number_devices = count;
|
||||
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
/* update node list for other device in the hive */
|
||||
if (tmp_adev != adev) {
|
||||
top_info = &tmp_adev->psp.xgmi_context.top_info;
|
||||
top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
|
||||
top_info->num_nodes = count;
|
||||
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
/* update node list for other device in the hive */
|
||||
if (tmp_adev != adev) {
|
||||
top_info = &tmp_adev->psp.xgmi_context.top_info;
|
||||
top_info->nodes[count - 1].node_id =
|
||||
adev->gmc.xgmi.node_id;
|
||||
top_info->num_nodes = count;
|
||||
}
|
||||
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
if (ret)
|
||||
goto exit;
|
||||
}
|
||||
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
if (ret)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* get latest topology info for each device from psp */
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
|
||||
&tmp_adev->psp.xgmi_context.top_info);
|
||||
if (ret) {
|
||||
dev_err(tmp_adev->dev,
|
||||
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
||||
tmp_adev->gmc.xgmi.node_id,
|
||||
tmp_adev->gmc.xgmi.hive_id, ret);
|
||||
/* To do : continue with some node failed or disable the whole hive */
|
||||
goto exit;
|
||||
/* get latest topology info for each device from psp */
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
|
||||
&tmp_adev->psp.xgmi_context.top_info);
|
||||
if (ret) {
|
||||
dev_err(tmp_adev->dev,
|
||||
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
||||
tmp_adev->gmc.xgmi.node_id,
|
||||
tmp_adev->gmc.xgmi.hive_id, ret);
|
||||
/* To do : continue with some node failed or disable the whole hive */
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "soc15.h"
|
||||
|
||||
#include "soc15_common.h"
|
||||
#include "soc15_hw_ip.h"
|
||||
#include "arct_ip_offset.h"
|
||||
|
||||
int arct_reg_base_init(struct amdgpu_device *adev)
|
||||
{
|
||||
/* HW has more IP blocks, only initialized the block needed by our driver */
|
||||
uint32_t i;
|
||||
for (i = 0 ; i < MAX_INSTANCE ; ++i) {
|
||||
adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
|
||||
adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
|
||||
adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
|
||||
adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
|
||||
adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
|
||||
adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
|
||||
adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
|
||||
adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
|
||||
adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
|
||||
adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i]));
|
||||
adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
|
||||
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -74,6 +74,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
athub_v2_0_update_medium_grain_clock_gating(adev,
|
||||
state == AMD_CG_STATE_GATE ? true : false);
|
||||
athub_v2_0_update_medium_grain_light_sleep(adev,
|
||||
|
|
|
@ -1291,6 +1291,12 @@ static int cik_asic_reset(struct amdgpu_device *adev)
|
|||
return r;
|
||||
}
|
||||
|
||||
static enum amd_reset_method
|
||||
cik_asic_reset_method(struct amdgpu_device *adev)
|
||||
{
|
||||
return AMD_RESET_METHOD_LEGACY;
|
||||
}
|
||||
|
||||
static u32 cik_get_config_memsize(struct amdgpu_device *adev)
|
||||
{
|
||||
return RREG32(mmCONFIG_MEMSIZE);
|
||||
|
@ -1823,6 +1829,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
|
|||
.read_bios_from_rom = &cik_read_bios_from_rom,
|
||||
.read_register = &cik_read_register,
|
||||
.reset = &cik_asic_reset,
|
||||
.reset_method = &cik_asic_reset_method,
|
||||
.set_vga_state = &cik_vga_set_state,
|
||||
.get_xclk = &cik_get_xclk,
|
||||
.set_uvd_clocks = &cik_set_uvd_clocks,
|
||||
|
|
|
@ -457,7 +457,10 @@ static int dce_virtual_hw_init(void *handle)
|
|||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ARCTURUS:
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
|
||||
|
|
|
@ -93,6 +93,96 @@ const struct attribute_group *df_v3_6_attr_groups[] = {
|
|||
NULL
|
||||
};
|
||||
|
||||
static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
|
||||
uint32_t ficaa_val)
|
||||
{
|
||||
unsigned long flags, address, data;
|
||||
uint32_t ficadl_val, ficadh_val;
|
||||
|
||||
address = adev->nbio_funcs->get_pcie_index_offset(adev);
|
||||
data = adev->nbio_funcs->get_pcie_data_offset(adev);
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
|
||||
WREG32(data, ficaa_val);
|
||||
|
||||
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
|
||||
ficadl_val = RREG32(data);
|
||||
|
||||
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
|
||||
ficadh_val = RREG32(data);
|
||||
|
||||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
|
||||
return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
|
||||
}
|
||||
|
||||
static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
|
||||
uint32_t ficadl_val, uint32_t ficadh_val)
|
||||
{
|
||||
unsigned long flags, address, data;
|
||||
|
||||
address = adev->nbio_funcs->get_pcie_index_offset(adev);
|
||||
data = adev->nbio_funcs->get_pcie_data_offset(adev);
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
|
||||
WREG32(data, ficaa_val);
|
||||
|
||||
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
|
||||
WREG32(data, ficadl_val);
|
||||
|
||||
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
|
||||
WREG32(data, ficadh_val);
|
||||
|
||||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* df_v3_6_perfmon_rreg - read perfmon lo and hi
|
||||
*
|
||||
* required to be atomic. no mmio method provided so subsequent reads for lo
|
||||
* and hi require to preserve df finite state machine
|
||||
*/
|
||||
static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
|
||||
uint32_t lo_addr, uint32_t *lo_val,
|
||||
uint32_t hi_addr, uint32_t *hi_val)
|
||||
{
|
||||
unsigned long flags, address, data;
|
||||
|
||||
address = adev->nbio_funcs->get_pcie_index_offset(adev);
|
||||
data = adev->nbio_funcs->get_pcie_data_offset(adev);
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
WREG32(address, lo_addr);
|
||||
*lo_val = RREG32(data);
|
||||
WREG32(address, hi_addr);
|
||||
*hi_val = RREG32(data);
|
||||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* df_v3_6_perfmon_wreg - write to perfmon lo and hi
|
||||
*
|
||||
* required to be atomic. no mmio method provided so subsequent reads after
|
||||
* data writes cannot occur to preserve data fabrics finite state machine.
|
||||
*/
|
||||
static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
|
||||
uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
|
||||
{
|
||||
unsigned long flags, address, data;
|
||||
|
||||
address = adev->nbio_funcs->get_pcie_index_offset(adev);
|
||||
data = adev->nbio_funcs->get_pcie_data_offset(adev);
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
WREG32(address, lo_addr);
|
||||
WREG32(data, lo_val);
|
||||
WREG32(address, hi_addr);
|
||||
WREG32(data, hi_val);
|
||||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
}
|
||||
|
||||
/* get the number of df counters available */
|
||||
static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
|
@ -268,6 +358,10 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
|
|||
uint32_t *lo_val,
|
||||
uint32_t *hi_val)
|
||||
{
|
||||
|
||||
uint32_t eventsel, instance, unitmask;
|
||||
uint32_t instance_10, instance_5432, instance_76;
|
||||
|
||||
df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
|
||||
|
||||
if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
|
||||
|
@ -276,40 +370,33 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
|
|||
return -ENXIO;
|
||||
}
|
||||
|
||||
if (lo_val && hi_val) {
|
||||
uint32_t eventsel, instance, unitmask;
|
||||
uint32_t instance_10, instance_5432, instance_76;
|
||||
eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
|
||||
unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
|
||||
instance = DF_V3_6_GET_INSTANCE(config);
|
||||
|
||||
eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
|
||||
unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
|
||||
instance = DF_V3_6_GET_INSTANCE(config);
|
||||
instance_10 = instance & 0x3;
|
||||
instance_5432 = (instance >> 2) & 0xf;
|
||||
instance_76 = (instance >> 6) & 0x3;
|
||||
|
||||
instance_10 = instance & 0x3;
|
||||
instance_5432 = (instance >> 2) & 0xf;
|
||||
instance_76 = (instance >> 6) & 0x3;
|
||||
*lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22);
|
||||
*hi_val = (instance_76 << 29) | instance_5432;
|
||||
|
||||
*lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
|
||||
*hi_val = (instance_76 << 29) | instance_5432;
|
||||
}
|
||||
DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
|
||||
config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* assign df performance counters for read */
|
||||
static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
|
||||
uint64_t config,
|
||||
int *is_assigned)
|
||||
/* add df performance counters for read */
|
||||
static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
|
||||
uint64_t config)
|
||||
{
|
||||
int i, target_cntr;
|
||||
|
||||
*is_assigned = 0;
|
||||
|
||||
target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
|
||||
|
||||
if (target_cntr >= 0) {
|
||||
*is_assigned = 1;
|
||||
if (target_cntr >= 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
|
||||
if (adev->df_perfmon_config_assign_mask[i] == 0U) {
|
||||
|
@ -344,45 +431,13 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
|
|||
if ((lo_base_addr == 0) || (hi_base_addr == 0))
|
||||
return;
|
||||
|
||||
WREG32_PCIE(lo_base_addr, 0UL);
|
||||
WREG32_PCIE(hi_base_addr, 0UL);
|
||||
}
|
||||
|
||||
|
||||
static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev,
|
||||
uint64_t config)
|
||||
{
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
|
||||
int ret, is_assigned;
|
||||
|
||||
ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
|
||||
|
||||
if (ret || is_assigned)
|
||||
return ret;
|
||||
|
||||
ret = df_v3_6_pmc_get_ctrl_settings(adev,
|
||||
config,
|
||||
&lo_base_addr,
|
||||
&hi_base_addr,
|
||||
&lo_val,
|
||||
&hi_val);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
|
||||
config, lo_base_addr, hi_base_addr, lo_val, hi_val);
|
||||
|
||||
WREG32_PCIE(lo_base_addr, lo_val);
|
||||
WREG32_PCIE(hi_base_addr, hi_val);
|
||||
|
||||
return ret;
|
||||
df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
|
||||
}
|
||||
|
||||
static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
|
||||
int is_enable)
|
||||
{
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val;
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
|
||||
int ret = 0;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
|
@ -391,24 +446,20 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
|
|||
df_v3_6_reset_perfmon_cntr(adev, config);
|
||||
|
||||
if (is_enable) {
|
||||
ret = df_v3_6_add_perfmon_cntr(adev, config);
|
||||
ret = df_v3_6_pmc_add_cntr(adev, config);
|
||||
} else {
|
||||
ret = df_v3_6_pmc_get_ctrl_settings(adev,
|
||||
config,
|
||||
&lo_base_addr,
|
||||
&hi_base_addr,
|
||||
NULL,
|
||||
NULL);
|
||||
&lo_val,
|
||||
&hi_val);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
lo_val = RREG32_PCIE(lo_base_addr);
|
||||
|
||||
DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
|
||||
config, lo_base_addr, hi_base_addr, lo_val);
|
||||
|
||||
WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
|
||||
df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
|
||||
hi_base_addr, hi_val);
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -422,7 +473,7 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
|
|||
static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
|
||||
int is_disable)
|
||||
{
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val;
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
|
||||
int ret = 0;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
|
@ -431,18 +482,13 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
|
|||
config,
|
||||
&lo_base_addr,
|
||||
&hi_base_addr,
|
||||
NULL,
|
||||
NULL);
|
||||
&lo_val,
|
||||
&hi_val);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
lo_val = RREG32_PCIE(lo_base_addr);
|
||||
|
||||
DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
|
||||
config, lo_base_addr, hi_base_addr, lo_val);
|
||||
|
||||
WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
|
||||
df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
|
||||
|
||||
if (is_disable)
|
||||
df_v3_6_pmc_release_cntr(adev, config);
|
||||
|
@ -471,8 +517,8 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
|
|||
if ((lo_base_addr == 0) || (hi_base_addr == 0))
|
||||
return;
|
||||
|
||||
lo_val = RREG32_PCIE(lo_base_addr);
|
||||
hi_val = RREG32_PCIE(hi_base_addr);
|
||||
df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
|
||||
hi_base_addr, &hi_val);
|
||||
|
||||
*count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
|
||||
|
||||
|
@ -480,7 +526,7 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
|
|||
*count = 0;
|
||||
|
||||
DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
|
||||
config, lo_base_addr, hi_base_addr, lo_val, hi_val);
|
||||
config, lo_base_addr, hi_base_addr, lo_val, hi_val);
|
||||
|
||||
break;
|
||||
|
||||
|
@ -499,5 +545,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
|
|||
.get_clockgating_state = df_v3_6_get_clockgating_state,
|
||||
.pmc_start = df_v3_6_pmc_start,
|
||||
.pmc_stop = df_v3_6_pmc_stop,
|
||||
.pmc_get_count = df_v3_6_pmc_get_count
|
||||
.pmc_get_count = df_v3_6_pmc_get_count,
|
||||
.get_fica = df_v3_6_get_fica,
|
||||
.set_fica = df_v3_6_set_fica
|
||||
};
|
||||
|
|
|
@ -20,8 +20,12 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <drm/drmP.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "amdgpu_psp.h"
|
||||
|
@ -56,6 +60,9 @@
|
|||
#define F32_CE_PROGRAM_RAM_SIZE 65536
|
||||
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
|
||||
|
||||
#define mmCGTT_GS_NGG_CLK_CTRL 0x5087
|
||||
#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi10_me.bin");
|
||||
|
@ -63,6 +70,20 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
|
|||
MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_me.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_me.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_10_1[] =
|
||||
{
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
|
||||
|
@ -109,6 +130,99 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
|
|||
/* Pending on emulation bring up */
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
|
||||
{
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000),
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
|
||||
{
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
|
||||
{
|
||||
/* Pending on emulation bring up */
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
|
||||
{
|
||||
/* Pending on emulation bring up */
|
||||
};
|
||||
|
||||
#define DEFAULT_SH_MEM_CONFIG \
|
||||
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
|
||||
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
|
||||
|
@ -250,6 +364,22 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
golden_settings_gc_10_0_nv10,
|
||||
(const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_10_1_1,
|
||||
(const u32)ARRAY_SIZE(golden_settings_gc_10_1_1));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_10_1_nv14,
|
||||
(const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14));
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_10_1_2,
|
||||
(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_10_1_2_nv12,
|
||||
(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -331,7 +461,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
|
|||
if (amdgpu_emu_mode == 1)
|
||||
msleep(1);
|
||||
else
|
||||
DRM_UDELAY(1);
|
||||
udelay(1);
|
||||
}
|
||||
if (i < adev->usec_timeout) {
|
||||
if (amdgpu_emu_mode == 1)
|
||||
|
@ -481,6 +611,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
|
|||
case CHIP_NAVI10:
|
||||
chip_name = "navi10";
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
chip_name = "navi14";
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
chip_name = "navi12";
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
@ -1026,6 +1162,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
adev->gfx.config.max_hw_contexts = 8;
|
||||
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
|
||||
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
|
||||
|
@ -1133,6 +1271,8 @@ static int gfx_v10_0_sw_init(void *handle)
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
adev->gfx.me.num_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 2;
|
||||
adev->gfx.me.num_queue_per_pipe = 1;
|
||||
|
@ -1452,6 +1592,25 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
|
||||
{
|
||||
int vmid;
|
||||
|
||||
/*
|
||||
* Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
|
||||
* access. Compute VMIDs should be enabled by FW for target VMIDs,
|
||||
* the driver can enable them for graphics. VMID0 should maintain
|
||||
* access so that HWS firmware can save/restore entries.
|
||||
*/
|
||||
for (vmid = 1; vmid < 16; vmid++) {
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
|
||||
WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, j, k;
|
||||
|
@ -1461,7 +1620,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
|
|||
u32 utcl_invreq_disable = 0;
|
||||
/*
|
||||
* GCRD_TARGETS_DISABLE field contains
|
||||
* for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
|
||||
* for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
|
||||
* for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0]
|
||||
*/
|
||||
u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask(
|
||||
2 * max_wgp_per_sh + /* TCP */
|
||||
|
@ -1469,7 +1629,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
|
|||
4); /* GL1C */
|
||||
/*
|
||||
* UTCL1_UTCL0_INVREQ_DISABLE field contains
|
||||
* for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
|
||||
* for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
|
||||
* for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0]
|
||||
*/
|
||||
u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask(
|
||||
2 * max_wgp_per_sh + /* TCP */
|
||||
|
@ -1477,7 +1638,9 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
|
|||
4 + /* RMI */
|
||||
1); /* SQG */
|
||||
|
||||
if (adev->asic_type == CHIP_NAVI10) {
|
||||
if (adev->asic_type == CHIP_NAVI10 ||
|
||||
adev->asic_type == CHIP_NAVI14 ||
|
||||
adev->asic_type == CHIP_NAVI12) {
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
|
||||
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
|
||||
|
@ -1535,7 +1698,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
|
|||
/* XXX SH_MEM regs */
|
||||
/* where to put LDS, scratch, GPUVM in FSA64 space */
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
|
||||
for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
|
||||
nv_grbm_select(adev, 0, 0, 0, i);
|
||||
/* CP and shaders */
|
||||
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
|
||||
|
@ -1552,6 +1715,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
|
|||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
gfx_v10_0_init_compute_vmid(adev);
|
||||
gfx_v10_0_init_gds_vmid(adev);
|
||||
|
||||
}
|
||||
|
||||
|
@ -1624,9 +1788,9 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
|
|||
* hence no handshake between SMU & RLC
|
||||
* GFXOFF will be disabled
|
||||
*/
|
||||
rlc_pg_cntl |= 0x80000;
|
||||
rlc_pg_cntl |= 0x800000;
|
||||
} else
|
||||
rlc_pg_cntl &= ~0x80000;
|
||||
rlc_pg_cntl &= ~0x800000;
|
||||
WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl);
|
||||
}
|
||||
|
||||
|
@ -4037,6 +4201,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,
|
|||
bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
if (!enable) {
|
||||
amdgpu_gfx_off_ctrl(adev, false);
|
||||
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
|
||||
|
@ -4056,6 +4221,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
gfx_v10_0_update_gfx_clock_gating(adev,
|
||||
state == AMD_CG_STATE_GATE ? true : false);
|
||||
break;
|
||||
|
@ -4462,7 +4629,7 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
|
|||
if (ring->trail_seq ==
|
||||
le32_to_cpu(*(ring->trail_fence_cpu_addr)))
|
||||
break;
|
||||
DRM_UDELAY(1);
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout) {
|
||||
|
@ -4936,7 +5103,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
|
|||
.align_mask = 0xff,
|
||||
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
|
||||
.support_64bit_ptrs = true,
|
||||
.vmhub = AMDGPU_GFXHUB,
|
||||
.vmhub = AMDGPU_GFXHUB_0,
|
||||
.get_rptr = gfx_v10_0_ring_get_rptr_gfx,
|
||||
.get_wptr = gfx_v10_0_ring_get_wptr_gfx,
|
||||
.set_wptr = gfx_v10_0_ring_set_wptr_gfx,
|
||||
|
@ -4987,7 +5154,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
|
|||
.align_mask = 0xff,
|
||||
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
|
||||
.support_64bit_ptrs = true,
|
||||
.vmhub = AMDGPU_GFXHUB,
|
||||
.vmhub = AMDGPU_GFXHUB_0,
|
||||
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
|
||||
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
|
||||
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
|
||||
|
@ -5020,7 +5187,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
|
|||
.align_mask = 0xff,
|
||||
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
|
||||
.support_64bit_ptrs = true,
|
||||
.vmhub = AMDGPU_GFXHUB,
|
||||
.vmhub = AMDGPU_GFXHUB_0,
|
||||
.get_rptr = gfx_v10_0_ring_get_rptr_compute,
|
||||
.get_wptr = gfx_v10_0_ring_get_wptr_compute,
|
||||
.set_wptr = gfx_v10_0_ring_set_wptr_compute,
|
||||
|
@ -5097,6 +5264,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
|
|||
{
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1890,6 +1890,24 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
|
||||
{
|
||||
int vmid;
|
||||
|
||||
/*
|
||||
* Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
|
||||
* access. Compute VMIDs should be enabled by FW for target VMIDs,
|
||||
* the driver can enable them for graphics. VMID0 should maintain
|
||||
* access so that HWS firmware can save/restore entries.
|
||||
*/
|
||||
for (vmid = 1; vmid < 16; vmid++) {
|
||||
WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
|
||||
WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
|
||||
WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
|
||||
WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v7_0_config_init(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->gfx.config.double_offchip_lds_buf = 1;
|
||||
|
@ -1968,6 +1986,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
|
|||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
gfx_v7_0_init_compute_vmid(adev);
|
||||
gfx_v7_0_init_gds_vmid(adev);
|
||||
|
||||
WREG32(mmSX_DEBUG_1, 0x20);
|
||||
|
||||
|
|
|
@ -1321,6 +1321,39 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
|
||||
if (unlikely(r != 0))
|
||||
return r;
|
||||
|
||||
r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
|
||||
AMDGPU_GEM_DOMAIN_VRAM);
|
||||
if (!r)
|
||||
adev->gfx.rlc.clear_state_gpu_addr =
|
||||
amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
|
||||
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (!adev->gfx.rlc.clear_state_obj)
|
||||
return;
|
||||
|
||||
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
|
||||
if (likely(r == 0)) {
|
||||
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
|
||||
|
@ -3717,6 +3750,24 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
|
||||
{
|
||||
int vmid;
|
||||
|
||||
/*
|
||||
* Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
|
||||
* access. Compute VMIDs should be enabled by FW for target VMIDs,
|
||||
* the driver can enable them for graphics. VMID0 should maintain
|
||||
* access so that HWS firmware can save/restore entries.
|
||||
*/
|
||||
for (vmid = 1; vmid < 16; vmid++) {
|
||||
WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
|
||||
WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
|
||||
WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
|
||||
WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v8_0_config_init(struct amdgpu_device *adev)
|
||||
{
|
||||
switch (adev->asic_type) {
|
||||
|
@ -3783,6 +3834,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
|
|||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
gfx_v8_0_init_compute_vmid(adev);
|
||||
gfx_v8_0_init_gds_vmid(adev);
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
/*
|
||||
|
@ -4785,6 +4837,10 @@ static int gfx_v8_0_hw_init(void *handle)
|
|||
gfx_v8_0_init_golden_registers(adev);
|
||||
gfx_v8_0_constants_init(adev);
|
||||
|
||||
r = gfx_v8_0_csb_vram_pin(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = adev->gfx.rlc.funcs->resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -4901,6 +4957,9 @@ static int gfx_v8_0_hw_fini(void *handle)
|
|||
else
|
||||
pr_err("rlc is busy, skip halt rlc\n");
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
|
||||
gfx_v8_0_csb_vram_unpin(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -357,7 +357,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
|
|||
|
||||
void gfxhub_v1_0_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
|
||||
|
||||
hub->ctx0_ptb_addr_lo32 =
|
||||
SOC15_REG_OFFSET(GC, 0,
|
||||
|
|
|
@ -333,7 +333,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
|
|||
|
||||
void gfxhub_v2_0_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
|
||||
|
||||
hub->ctx0_ptb_addr_lo32 =
|
||||
SOC15_REG_OFFSET(GC, 0,
|
||||
|
|
|
@ -62,7 +62,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
|
|||
struct amdgpu_vmhub *hub;
|
||||
u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i;
|
||||
|
||||
bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
|
@ -70,7 +70,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
|
|||
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
|
||||
|
||||
bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
|
||||
|
@ -81,39 +81,39 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
|
|||
switch (state) {
|
||||
case AMDGPU_IRQ_STATE_DISABLE:
|
||||
/* MM HUB */
|
||||
hub = &adev->vmhub[AMDGPU_MMHUB];
|
||||
hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
for (i = 0; i < 16; i++) {
|
||||
reg = hub->vm_context0_cntl + i;
|
||||
tmp = RREG32(reg);
|
||||
tmp &= ~bits[AMDGPU_MMHUB];
|
||||
tmp &= ~bits[AMDGPU_MMHUB_0];
|
||||
WREG32(reg, tmp);
|
||||
}
|
||||
|
||||
/* GFX HUB */
|
||||
hub = &adev->vmhub[AMDGPU_GFXHUB];
|
||||
hub = &adev->vmhub[AMDGPU_GFXHUB_0];
|
||||
for (i = 0; i < 16; i++) {
|
||||
reg = hub->vm_context0_cntl + i;
|
||||
tmp = RREG32(reg);
|
||||
tmp &= ~bits[AMDGPU_GFXHUB];
|
||||
tmp &= ~bits[AMDGPU_GFXHUB_0];
|
||||
WREG32(reg, tmp);
|
||||
}
|
||||
break;
|
||||
case AMDGPU_IRQ_STATE_ENABLE:
|
||||
/* MM HUB */
|
||||
hub = &adev->vmhub[AMDGPU_MMHUB];
|
||||
hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
for (i = 0; i < 16; i++) {
|
||||
reg = hub->vm_context0_cntl + i;
|
||||
tmp = RREG32(reg);
|
||||
tmp |= bits[AMDGPU_MMHUB];
|
||||
tmp |= bits[AMDGPU_MMHUB_0];
|
||||
WREG32(reg, tmp);
|
||||
}
|
||||
|
||||
/* GFX HUB */
|
||||
hub = &adev->vmhub[AMDGPU_GFXHUB];
|
||||
hub = &adev->vmhub[AMDGPU_GFXHUB_0];
|
||||
for (i = 0; i < 16; i++) {
|
||||
reg = hub->vm_context0_cntl + i;
|
||||
tmp = RREG32(reg);
|
||||
tmp |= bits[AMDGPU_GFXHUB];
|
||||
tmp |= bits[AMDGPU_GFXHUB_0];
|
||||
WREG32(reg, tmp);
|
||||
}
|
||||
break;
|
||||
|
@ -244,11 +244,11 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
|||
|
||||
mutex_lock(&adev->mman.gtt_window_lock);
|
||||
|
||||
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0);
|
||||
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
|
||||
if (!adev->mman.buffer_funcs_enabled ||
|
||||
!adev->ib_pool_ready ||
|
||||
adev->in_gpu_reset) {
|
||||
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0);
|
||||
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
|
||||
mutex_unlock(&adev->mman.gtt_window_lock);
|
||||
return;
|
||||
}
|
||||
|
@ -313,7 +313,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
|
|||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reg;
|
||||
|
||||
if (ring->funcs->vmhub == AMDGPU_GFXHUB)
|
||||
if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
|
||||
else
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
|
||||
|
@ -524,6 +524,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
|
|||
if (amdgpu_gart_size == -1) {
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
default:
|
||||
adev->gmc.gart_size = 512ULL << 20;
|
||||
break;
|
||||
|
@ -601,9 +603,12 @@ static int gmc_v10_0_sw_init(void *handle)
|
|||
adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
adev->num_vmhubs = 2;
|
||||
/*
|
||||
* To fulfill 4-level page support,
|
||||
* vm size is 256TB (48bit), maximum size of Navi10,
|
||||
* vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,
|
||||
* block size 512 (9bit)
|
||||
*/
|
||||
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
|
||||
|
@ -680,8 +685,8 @@ static int gmc_v10_0_sw_init(void *handle)
|
|||
* amdgpu graphics/compute will use VMIDs 1-7
|
||||
* amdkfd will use VMIDs 8-15
|
||||
*/
|
||||
adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
|
||||
amdgpu_vm_manager_init(adev);
|
||||
|
||||
|
@ -717,6 +722,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
{
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -48,6 +48,8 @@
|
|||
#include "gfxhub_v1_0.h"
|
||||
#include "mmhub_v1_0.h"
|
||||
#include "gfxhub_v1_1.h"
|
||||
#include "mmhub_v9_4.h"
|
||||
#include "umc_v6_1.h"
|
||||
|
||||
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
|
||||
|
||||
|
@ -241,11 +243,23 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
|
||||
amdgpu_ras_reset_gpu(adev, 0);
|
||||
return AMDGPU_RAS_UE;
|
||||
if (adev->umc.funcs->query_ras_error_count)
|
||||
adev->umc.funcs->query_ras_error_count(adev, err_data);
|
||||
/* umc query_ras_error_address is also responsible for clearing
|
||||
* error status
|
||||
*/
|
||||
if (adev->umc.funcs->query_ras_error_address)
|
||||
adev->umc.funcs->query_ras_error_address(adev, err_data);
|
||||
|
||||
/* only uncorrectable error needs gpu reset */
|
||||
if (err_data->ue_count)
|
||||
amdgpu_ras_reset_gpu(adev, 0);
|
||||
|
||||
return AMDGPU_RAS_SUCCESS;
|
||||
}
|
||||
|
||||
static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,
|
||||
|
@ -284,7 +298,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
|
|||
|
||||
switch (state) {
|
||||
case AMDGPU_IRQ_STATE_DISABLE:
|
||||
for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) {
|
||||
for (j = 0; j < adev->num_vmhubs; j++) {
|
||||
hub = &adev->vmhub[j];
|
||||
for (i = 0; i < 16; i++) {
|
||||
reg = hub->vm_context0_cntl + i;
|
||||
|
@ -295,7 +309,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
|
|||
}
|
||||
break;
|
||||
case AMDGPU_IRQ_STATE_ENABLE:
|
||||
for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) {
|
||||
for (j = 0; j < adev->num_vmhubs; j++) {
|
||||
hub = &adev->vmhub[j];
|
||||
for (i = 0; i < 16; i++) {
|
||||
reg = hub->vm_context0_cntl + i;
|
||||
|
@ -315,10 +329,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
|
|||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
|
||||
struct amdgpu_vmhub *hub;
|
||||
bool retry_fault = !!(entry->src_data[1] & 0x80);
|
||||
uint32_t status = 0;
|
||||
u64 addr;
|
||||
char hub_name[10];
|
||||
|
||||
addr = (u64)entry->src_data[0] << 12;
|
||||
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
|
||||
|
@ -327,6 +342,17 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
|
|||
entry->timestamp))
|
||||
return 1; /* This also prevents sending it to KFD */
|
||||
|
||||
if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
|
||||
snprintf(hub_name, sizeof(hub_name), "mmhub0");
|
||||
hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
|
||||
snprintf(hub_name, sizeof(hub_name), "mmhub1");
|
||||
hub = &adev->vmhub[AMDGPU_MMHUB_1];
|
||||
} else {
|
||||
snprintf(hub_name, sizeof(hub_name), "gfxhub0");
|
||||
hub = &adev->vmhub[AMDGPU_GFXHUB_0];
|
||||
}
|
||||
|
||||
/* If it's the first fault for this address, process it normally */
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
status = RREG32(hub->vm_l2_pro_fault_status);
|
||||
|
@ -342,17 +368,30 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
|
|||
dev_err(adev->dev,
|
||||
"[%s] %s page fault (src_id:%u ring:%u vmid:%u "
|
||||
"pasid:%u, for process %s pid %d thread %s pid %d)\n",
|
||||
entry->vmid_src ? "mmhub" : "gfxhub",
|
||||
retry_fault ? "retry" : "no-retry",
|
||||
hub_name, retry_fault ? "retry" : "no-retry",
|
||||
entry->src_id, entry->ring_id, entry->vmid,
|
||||
entry->pasid, task_info.process_name, task_info.tgid,
|
||||
task_info.task_name, task_info.pid);
|
||||
dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n",
|
||||
dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
|
||||
addr, entry->client_id);
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
dev_err(adev->dev,
|
||||
"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
|
||||
status);
|
||||
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
|
||||
REG_GET_FIELD(status,
|
||||
VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
|
||||
dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
|
||||
REG_GET_FIELD(status,
|
||||
VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
|
||||
dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
|
||||
REG_GET_FIELD(status,
|
||||
VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
|
||||
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
|
||||
REG_GET_FIELD(status,
|
||||
VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -419,7 +458,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
|||
const unsigned eng = 17;
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
|
||||
for (i = 0; i < adev->num_vmhubs; ++i) {
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[i];
|
||||
u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
|
||||
|
||||
|
@ -480,7 +519,11 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
|
|||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reg;
|
||||
|
||||
if (ring->funcs->vmhub == AMDGPU_GFXHUB)
|
||||
/* Do nothing because there's no lut register for mmhub1. */
|
||||
if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
|
||||
return;
|
||||
|
||||
if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
|
||||
else
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
|
||||
|
@ -597,12 +640,29 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
|
|||
adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
|
||||
}
|
||||
|
||||
static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA20:
|
||||
adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
|
||||
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
|
||||
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
|
||||
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET;
|
||||
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
|
||||
adev->umc.funcs = &umc_v6_1_funcs;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int gmc_v9_0_early_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
gmc_v9_0_set_gmc_funcs(adev);
|
||||
gmc_v9_0_set_irq_funcs(adev);
|
||||
gmc_v9_0_set_umc_funcs(adev);
|
||||
|
||||
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
|
||||
adev->gmc.shared_aperture_end =
|
||||
|
@ -629,6 +689,7 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
|
|||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_ARCTURUS:
|
||||
return true;
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
|
@ -641,7 +702,8 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
|
|||
{
|
||||
struct amdgpu_ring *ring;
|
||||
unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
|
||||
{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP};
|
||||
{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
|
||||
GFXHUB_FREE_VM_INV_ENGS_BITMAP};
|
||||
unsigned i;
|
||||
unsigned vmhub, inv_eng;
|
||||
|
||||
|
@ -689,6 +751,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
|
|||
amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* handle resume path. */
|
||||
if (*ras_if) {
|
||||
/* resend ras TA enable cmd during resume.
|
||||
|
@ -806,8 +869,12 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
|
|||
struct amdgpu_gmc *mc)
|
||||
{
|
||||
u64 base = 0;
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
base = mmhub_v1_0_get_fb_location(adev);
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
if (adev->asic_type == CHIP_ARCTURUS)
|
||||
base = mmhub_v9_4_get_fb_location(adev);
|
||||
else
|
||||
base = mmhub_v1_0_get_fb_location(adev);
|
||||
}
|
||||
/* add the xgmi offset of the physical node */
|
||||
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
|
||||
amdgpu_gmc_vram_location(adev, mc, base);
|
||||
|
@ -887,6 +954,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
|
|||
case CHIP_VEGA10: /* all engines support GPUVM */
|
||||
case CHIP_VEGA12: /* all engines support GPUVM */
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ARCTURUS:
|
||||
default:
|
||||
adev->gmc.gart_size = 512ULL << 20;
|
||||
break;
|
||||
|
@ -923,7 +991,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
|
|||
|
||||
static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
|
||||
u32 d1vga_control;
|
||||
unsigned size;
|
||||
|
||||
/*
|
||||
|
@ -933,6 +1001,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
|
|||
if (gmc_v9_0_keep_stolen_memory(adev))
|
||||
return 9 * 1024 * 1024;
|
||||
|
||||
d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
|
||||
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
|
||||
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
|
||||
} else {
|
||||
|
@ -972,13 +1041,18 @@ static int gmc_v9_0_sw_init(void *handle)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
gfxhub_v1_0_init(adev);
|
||||
mmhub_v1_0_init(adev);
|
||||
if (adev->asic_type == CHIP_ARCTURUS)
|
||||
mmhub_v9_4_init(adev);
|
||||
else
|
||||
mmhub_v1_0_init(adev);
|
||||
|
||||
spin_lock_init(&adev->gmc.invalidate_lock);
|
||||
|
||||
adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_RAVEN:
|
||||
adev->num_vmhubs = 2;
|
||||
|
||||
if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
|
||||
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
|
||||
} else {
|
||||
|
@ -991,6 +1065,8 @@ static int gmc_v9_0_sw_init(void *handle)
|
|||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
adev->num_vmhubs = 2;
|
||||
|
||||
/*
|
||||
* To fulfill 4-level page support,
|
||||
* vm size is 256TB (48bit), maximum size of Vega10,
|
||||
|
@ -1002,6 +1078,12 @@ static int gmc_v9_0_sw_init(void *handle)
|
|||
else
|
||||
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
adev->num_vmhubs = 3;
|
||||
|
||||
/* Keep the vm size same with Vega20 */
|
||||
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1012,6 +1094,13 @@ static int gmc_v9_0_sw_init(void *handle)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
if (adev->asic_type == CHIP_ARCTURUS) {
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
|
||||
&adev->gmc.vm_fault);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
|
||||
&adev->gmc.vm_fault);
|
||||
|
||||
|
@ -1077,8 +1166,9 @@ static int gmc_v9_0_sw_init(void *handle)
|
|||
* amdgpu graphics/compute will use VMIDs 1-7
|
||||
* amdkfd will use VMIDs 8-15
|
||||
*/
|
||||
adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS;
|
||||
|
||||
amdgpu_vm_manager_init(adev);
|
||||
|
||||
|
@ -1123,7 +1213,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
break;
|
||||
/* fall through */
|
||||
case CHIP_VEGA20:
|
||||
|
@ -1181,7 +1271,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = mmhub_v1_0_gart_enable(adev);
|
||||
if (adev->asic_type == CHIP_ARCTURUS)
|
||||
r = mmhub_v9_4_gart_enable(adev);
|
||||
else
|
||||
r = mmhub_v1_0_gart_enable(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -1202,7 +1295,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
|
|||
value = true;
|
||||
|
||||
gfxhub_v1_0_set_fault_enable_default(adev, value);
|
||||
mmhub_v1_0_set_fault_enable_default(adev, value);
|
||||
if (adev->asic_type == CHIP_ARCTURUS)
|
||||
mmhub_v9_4_set_fault_enable_default(adev, value);
|
||||
else
|
||||
mmhub_v1_0_set_fault_enable_default(adev, value);
|
||||
gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
|
||||
|
||||
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
|
||||
|
@ -1243,7 +1339,10 @@ static int gmc_v9_0_hw_init(void *handle)
|
|||
static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
|
||||
{
|
||||
gfxhub_v1_0_gart_disable(adev);
|
||||
mmhub_v1_0_gart_disable(adev);
|
||||
if (adev->asic_type == CHIP_ARCTURUS)
|
||||
mmhub_v9_4_gart_disable(adev);
|
||||
else
|
||||
mmhub_v1_0_gart_disable(adev);
|
||||
amdgpu_gart_table_vram_unpin(adev);
|
||||
}
|
||||
|
||||
|
@ -1308,6 +1407,9 @@ static int gmc_v9_0_set_clockgating_state(void *handle,
|
|||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (adev->asic_type == CHIP_ARCTURUS)
|
||||
return 0;
|
||||
|
||||
return mmhub_v1_0_set_clockgating(adev, state);
|
||||
}
|
||||
|
||||
|
@ -1315,6 +1417,9 @@ static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
|
|||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (adev->asic_type == CHIP_ARCTURUS)
|
||||
return;
|
||||
|
||||
mmhub_v1_0_get_clockgating(adev, flags);
|
||||
}
|
||||
|
||||
|
|
|
@ -37,4 +37,11 @@
|
|||
extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
|
||||
extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
|
||||
|
||||
/* amdgpu_amdkfd*.c */
|
||||
void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
|
||||
uint64_t value);
|
||||
void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
|
||||
uint64_t value);
|
||||
void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
|
||||
uint32_t vmid, uint64_t value);
|
||||
#endif
|
||||
|
|
|
@ -111,7 +111,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
|
|||
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
|
||||
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return;
|
||||
|
||||
/* Set default page address. */
|
||||
|
@ -159,7 +159,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
|||
{
|
||||
uint32_t tmp;
|
||||
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return;
|
||||
|
||||
/* Setup L2 cache */
|
||||
|
@ -208,7 +208,7 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
|
|||
|
||||
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return;
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
|
||||
|
@ -348,7 +348,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
|
|||
0);
|
||||
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
|
||||
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
||||
|
@ -367,7 +367,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
|
|||
{
|
||||
u32 tmp;
|
||||
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return;
|
||||
|
||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
|
||||
|
@ -407,7 +407,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
|
|||
|
||||
void mmhub_v1_0_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
|
||||
hub->ctx0_ptb_addr_lo32 =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
|
|
|
@ -324,7 +324,7 @@ void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
|
|||
|
||||
void mmhub_v2_0_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
|
||||
|
||||
hub->ctx0_ptb_addr_lo32 =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
|
@ -406,6 +406,7 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
mmhub_v2_0_update_medium_grain_clock_gating(adev,
|
||||
state == AMD_CG_STATE_GATE ? true : false);
|
||||
mmhub_v2_0_update_medium_grain_light_sleep(adev,
|
||||
|
|
|
@ -0,0 +1,517 @@
|
|||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "mmhub_v9_4.h"
|
||||
|
||||
#include "mmhub/mmhub_9_4_1_offset.h"
|
||||
#include "mmhub/mmhub_9_4_1_sh_mask.h"
|
||||
#include "mmhub/mmhub_9_4_1_default.h"
|
||||
#include "athub/athub_1_0_offset.h"
|
||||
#include "athub/athub_1_0_sh_mask.h"
|
||||
#include "vega10_enum.h"
|
||||
|
||||
#include "soc15_common.h"
|
||||
|
||||
#define MMHUB_NUM_INSTANCES 2
|
||||
#define MMHUB_INSTANCE_REGISTER_OFFSET 0x3000
|
||||
|
||||
u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
|
||||
{
|
||||
/* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */
|
||||
u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE);
|
||||
u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP);
|
||||
|
||||
base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
|
||||
base <<= 24;
|
||||
|
||||
top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
|
||||
top <<= 24;
|
||||
|
||||
adev->gmc.fb_start = base;
|
||||
adev->gmc.fb_end = top;
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
|
||||
uint32_t vmid, uint64_t value)
|
||||
{
|
||||
/* two registers distance between mmVML2VC0_VM_CONTEXT0_* to
|
||||
* mmVML2VC0_VM_CONTEXT1_*
|
||||
*/
|
||||
int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
|
||||
- mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
|
||||
dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
lower_32_bits(value));
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
|
||||
dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
upper_32_bits(value));
|
||||
|
||||
}
|
||||
|
||||
static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev,
|
||||
int hubid)
|
||||
{
|
||||
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
|
||||
|
||||
mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base);
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
(u32)(adev->gmc.gart_start >> 12));
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
(u32)(adev->gmc.gart_start >> 44));
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
(u32)(adev->gmc.gart_end >> 12));
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
(u32)(adev->gmc.gart_end >> 44));
|
||||
}
|
||||
|
||||
static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
|
||||
int hubid)
|
||||
{
|
||||
uint64_t value;
|
||||
uint32_t tmp;
|
||||
|
||||
/* Program the AGP BAR */
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
adev->gmc.agp_end >> 24);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
adev->gmc.agp_start >> 24);
|
||||
|
||||
/* Program the system aperture low logical page number. */
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
|
||||
|
||||
/* Set default page address. */
|
||||
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
|
||||
adev->vm_manager.vram_base_offset;
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
(u32)(value >> 12));
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
(u32)(value >> 44));
|
||||
|
||||
/* Program "protection fault". */
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
(u32)(adev->dummy_page_addr >> 12));
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
(u32)((u64)adev->dummy_page_addr >> 44));
|
||||
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
|
||||
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
}
|
||||
|
||||
static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
|
||||
{
|
||||
uint32_t tmp;
|
||||
|
||||
/* Setup TLB control */
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
|
||||
|
||||
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_L1_TLB, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
SYSTEM_ACCESS_MODE, 3);
|
||||
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
|
||||
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
ECO_BITS, 0);
|
||||
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
MTYPE, MTYPE_UC);/* XXX for emulation. */
|
||||
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
ATC_EN, 1);
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
}
|
||||
|
||||
static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
|
||||
{
|
||||
uint32_t tmp;
|
||||
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
|
||||
ENABLE_L2_CACHE, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
|
||||
ENABLE_L2_FRAGMENT_PROCESSING, 1);
|
||||
/* XXX for emulation, Refer to closed source code.*/
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
|
||||
L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
|
||||
PDE_FAULT_CLASSIFICATION, 0);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
|
||||
CONTEXT1_IDENTITY_ACCESS_MODE, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
|
||||
IDENTITY_MODE_FRAGMENT_SIZE, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2,
|
||||
INVALIDATE_ALL_L1_TLBS, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2,
|
||||
INVALIDATE_L2_CACHE, 1);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
|
||||
tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT;
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
|
||||
tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT;
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4,
|
||||
VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4,
|
||||
VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
}
|
||||
|
||||
static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev,
|
||||
int hubid)
|
||||
{
|
||||
uint32_t tmp;
|
||||
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
}
|
||||
|
||||
static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
|
||||
int hubid)
|
||||
{
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F);
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
|
||||
}
|
||||
|
||||
static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
|
||||
{
|
||||
uint32_t tmp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i <= 14; i++) {
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
ENABLE_CONTEXT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
PAGE_TABLE_DEPTH,
|
||||
adev->vm_manager.num_level);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
PAGE_TABLE_BLOCK_SIZE,
|
||||
adev->vm_manager.block_size - 9);
|
||||
/* Send no-retry XNACK on fault to suppress VM fault storm. */
|
||||
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
|
||||
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i,
|
||||
tmp);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2,
|
||||
lower_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2,
|
||||
upper_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
}
|
||||
}
|
||||
|
||||
static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev,
|
||||
int hubid)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 18; ++i) {
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i,
|
||||
0xffffffff);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
|
||||
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i,
|
||||
0x1f);
|
||||
}
|
||||
}
|
||||
|
||||
int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
/*
|
||||
* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase
|
||||
* they are VF copy registers so vbios post doesn't
|
||||
* program them, for SRIOV driver need to program them
|
||||
*/
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE,
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
adev->gmc.vram_start >> 24);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP,
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET,
|
||||
adev->gmc.vram_end >> 24);
|
||||
}
|
||||
|
||||
/* GART Enable. */
|
||||
mmhub_v9_4_init_gart_aperture_regs(adev, i);
|
||||
mmhub_v9_4_init_system_aperture_regs(adev, i);
|
||||
mmhub_v9_4_init_tlb_regs(adev, i);
|
||||
mmhub_v9_4_init_cache_regs(adev, i);
|
||||
|
||||
mmhub_v9_4_enable_system_domain(adev, i);
|
||||
mmhub_v9_4_disable_identity_aperture(adev, i);
|
||||
mmhub_v9_4_setup_vmid_config(adev, i);
|
||||
mmhub_v9_4_program_invalidation(adev, i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp;
|
||||
u32 i, j;
|
||||
|
||||
for (j = 0; j < MMHUB_NUM_INSTANCES; j++) {
|
||||
/* Disable all tables */
|
||||
for (i = 0; i < 16; i++)
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_CNTL,
|
||||
j * MMHUB_INSTANCE_REGISTER_OFFSET +
|
||||
i, 0);
|
||||
|
||||
/* Setup TLB control */
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
j * MMHUB_INSTANCE_REGISTER_OFFSET);
|
||||
tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_L1_TLB, 0);
|
||||
tmp = REG_SET_FIELD(tmp,
|
||||
VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
|
||||
j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
|
||||
j * MMHUB_INSTANCE_REGISTER_OFFSET);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
|
||||
ENABLE_L2_CACHE, 0);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
|
||||
j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
|
||||
j * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @value: true redirects VM faults to the default page
|
||||
*/
|
||||
void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value)
|
||||
{
|
||||
u32 tmp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
|
||||
tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp,
|
||||
VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
NACK_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
VALID_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
READ_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT,
|
||||
value);
|
||||
if (!value) {
|
||||
tmp = REG_SET_FIELD(tmp,
|
||||
VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
CRASH_ON_NO_RETRY_FAULT, 1);
|
||||
tmp = REG_SET_FIELD(tmp,
|
||||
VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
CRASH_ON_RETRY_FAULT, 1);
|
||||
}
|
||||
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void mmhub_v9_4_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] =
|
||||
{&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
|
||||
hub[i]->ctx0_ptb_addr_lo32 =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) +
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET;
|
||||
hub[i]->ctx0_ptb_addr_hi32 =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET;
|
||||
hub[i]->vm_inv_eng0_req =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET;
|
||||
hub[i]->vm_inv_eng0_ack =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_INVALIDATE_ENG0_ACK) +
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET;
|
||||
hub[i]->vm_context0_cntl =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmVML2VC0_VM_CONTEXT0_CNTL) +
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET;
|
||||
hub[i]->vm_l2_pro_fault_status =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) +
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET;
|
||||
hub[i]->vm_l2_pro_fault_cntl =
|
||||
SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) +
|
||||
i * MMHUB_INSTANCE_REGISTER_OFFSET;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef __MMHUB_V9_4_H__
|
||||
#define __MMHUB_V9_4_H__
|
||||
|
||||
u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev);
|
||||
int mmhub_v9_4_gart_enable(struct amdgpu_device *adev);
|
||||
void mmhub_v9_4_gart_disable(struct amdgpu_device *adev);
|
||||
void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev,
|
||||
bool value);
|
||||
void mmhub_v9_4_init(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
|
@ -449,20 +449,6 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
|
|||
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
|
||||
}
|
||||
|
||||
static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
|
||||
|
||||
/* Enable L1 security reg access mode by defaul, as non-security VF
|
||||
* will no longer be supported.
|
||||
*/
|
||||
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
|
||||
|
||||
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
|
||||
|
||||
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
|
||||
}
|
||||
|
||||
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
|
||||
.req_full_gpu = xgpu_ai_request_full_gpu_access,
|
||||
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
|
||||
|
@ -471,5 +457,4 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
|
|||
.trans_msg = xgpu_ai_mailbox_trans_msg,
|
||||
.get_pp_clk = xgpu_ai_get_pp_clk,
|
||||
.force_dpm_level = xgpu_ai_force_dpm_level,
|
||||
.init_reg_access_mode = xgpu_ai_init_reg_access_mode,
|
||||
};
|
||||
|
|
|
@ -21,7 +21,8 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <drm/drmP.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_ih.h"
|
||||
|
||||
|
|
|
@ -29,20 +29,8 @@
|
|||
|
||||
int navi10_reg_base_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int r, i;
|
||||
int i;
|
||||
|
||||
if (amdgpu_discovery) {
|
||||
r = amdgpu_discovery_reg_base_init(adev);
|
||||
if (r) {
|
||||
DRM_WARN("failed to init reg base from ip discovery table, "
|
||||
"fallback to legacy init method\n");
|
||||
goto legacy_init;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
legacy_init:
|
||||
for (i = 0 ; i < MAX_INSTANCE ; ++i) {
|
||||
adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
|
||||
adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "nv.h"
|
||||
|
||||
#include "soc15_common.h"
|
||||
#include "soc15_hw_ip.h"
|
||||
#include "navi12_ip_offset.h"
|
||||
|
||||
int navi12_reg_base_init(struct amdgpu_device *adev)
|
||||
{
|
||||
/* HW has more IP blocks, only initialized the blocks needed by driver */
|
||||
uint32_t i;
|
||||
for (i = 0 ; i < MAX_INSTANCE ; ++i) {
|
||||
adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
|
||||
adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
|
||||
adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
|
||||
adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
|
||||
adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
|
||||
adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
|
||||
adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
|
||||
adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
|
||||
adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
|
||||
adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
|
||||
adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
|
||||
adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
|
||||
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
|
||||
adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "nv.h"
|
||||
|
||||
#include "soc15_common.h"
|
||||
#include "soc15_hw_ip.h"
|
||||
#include "navi14_ip_offset.h"
|
||||
|
||||
int navi14_reg_base_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < MAX_INSTANCE ; ++i) {
|
||||
adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
|
||||
adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
|
||||
adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
|
||||
adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
|
||||
adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
|
||||
adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
|
||||
adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
|
||||
adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
|
||||
adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
|
||||
adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
|
||||
adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
|
||||
adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
|
||||
adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
|
||||
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
|
||||
adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -92,7 +92,7 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan
|
|||
}
|
||||
|
||||
static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
|
||||
int doorbell_index)
|
||||
int doorbell_index, int instance)
|
||||
{
|
||||
u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
|
||||
|
||||
|
|
|
@ -31,6 +31,25 @@
|
|||
|
||||
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
|
||||
|
||||
/*
|
||||
* These are nbio v7_4_1 registers mask. Temporarily define these here since
|
||||
* nbio v7_4_1 header is incomplete.
|
||||
*/
|
||||
#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L
|
||||
#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L
|
||||
#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L
|
||||
#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L
|
||||
#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L
|
||||
#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L
|
||||
|
||||
#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc
|
||||
#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
|
||||
//BIF_MMSCH1_DOORBELL_RANGE
|
||||
#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT 0x2
|
||||
#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT 0x10
|
||||
#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL
|
||||
#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L
|
||||
|
||||
static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
|
||||
{
|
||||
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
|
||||
|
@ -75,10 +94,24 @@ static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
|
|||
static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
|
||||
bool use_doorbell, int doorbell_index, int doorbell_size)
|
||||
{
|
||||
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
|
||||
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
|
||||
u32 reg, doorbell_range;
|
||||
|
||||
u32 doorbell_range = RREG32(reg);
|
||||
if (instance < 2)
|
||||
reg = instance +
|
||||
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE);
|
||||
else
|
||||
/*
|
||||
* These registers address of SDMA2~7 is not consecutive
|
||||
* from SDMA0~1. Need plus 4 dwords offset.
|
||||
*
|
||||
* BIF_SDMA0_DOORBELL_RANGE: 0x3bc0
|
||||
* BIF_SDMA1_DOORBELL_RANGE: 0x3bc4
|
||||
* BIF_SDMA2_DOORBELL_RANGE: 0x3bd8
|
||||
*/
|
||||
reg = instance + 0x4 +
|
||||
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE);
|
||||
|
||||
doorbell_range = RREG32(reg);
|
||||
|
||||
if (use_doorbell) {
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
|
||||
|
@ -89,6 +122,32 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan
|
|||
WREG32(reg, doorbell_range);
|
||||
}
|
||||
|
||||
static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
|
||||
int doorbell_index, int instance)
|
||||
{
|
||||
u32 reg;
|
||||
u32 doorbell_range;
|
||||
|
||||
if (instance)
|
||||
reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE);
|
||||
else
|
||||
reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
|
||||
|
||||
doorbell_range = RREG32(reg);
|
||||
|
||||
if (use_doorbell) {
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range,
|
||||
BIF_MMSCH0_DOORBELL_RANGE, OFFSET,
|
||||
doorbell_index);
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range,
|
||||
BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8);
|
||||
} else
|
||||
doorbell_range = REG_SET_FIELD(doorbell_range,
|
||||
BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0);
|
||||
|
||||
WREG32(reg, doorbell_range);
|
||||
}
|
||||
|
||||
static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
|
@ -220,6 +279,12 @@ static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
|
|||
.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
|
||||
.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
|
||||
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
|
||||
.ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK,
|
||||
.ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
|
||||
.ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
|
||||
.ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
|
||||
.ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
|
||||
.ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
|
||||
};
|
||||
|
||||
static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
|
||||
|
@ -261,6 +326,7 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
|
|||
.hdp_flush = nbio_v7_4_hdp_flush,
|
||||
.get_memsize = nbio_v7_4_get_memsize,
|
||||
.sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range,
|
||||
.vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range,
|
||||
.enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,
|
||||
.enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture,
|
||||
.ih_doorbell_range = nbio_v7_4_ih_doorbell_range,
|
||||
|
|
|
@ -23,7 +23,8 @@
|
|||
#include <linux/firmware.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <drm/drmP.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_atombios.h"
|
||||
#include "amdgpu_ih.h"
|
||||
|
@ -289,6 +290,18 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static enum amd_reset_method
|
||||
nv_asic_reset_method(struct amdgpu_device *adev)
|
||||
{
|
||||
struct smu_context *smu = &adev->smu;
|
||||
|
||||
if (smu_baco_is_support(smu))
|
||||
return AMD_RESET_METHOD_BACO;
|
||||
else
|
||||
return AMD_RESET_METHOD_MODE1;
|
||||
}
|
||||
|
||||
static int nv_asic_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
|
@ -303,7 +316,7 @@ static int nv_asic_reset(struct amdgpu_device *adev)
|
|||
int ret = 0;
|
||||
struct smu_context *smu = &adev->smu;
|
||||
|
||||
if (smu_baco_is_support(smu))
|
||||
if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
|
||||
ret = smu_baco_reset(smu);
|
||||
else
|
||||
ret = nv_asic_mode1_reset(adev);
|
||||
|
@ -363,23 +376,55 @@ static const struct amdgpu_ip_block_version nv_common_ip_block =
|
|||
.funcs = &nv_common_ip_funcs,
|
||||
};
|
||||
|
||||
int nv_set_ip_blocks(struct amdgpu_device *adev)
|
||||
static int nv_reg_base_init(struct amdgpu_device *adev)
|
||||
{
|
||||
/* Set IP register base before any HW register access */
|
||||
int r;
|
||||
|
||||
if (amdgpu_discovery) {
|
||||
r = amdgpu_discovery_reg_base_init(adev);
|
||||
if (r) {
|
||||
DRM_WARN("failed to init reg base from ip discovery table, "
|
||||
"fallback to legacy init method\n");
|
||||
goto legacy_init;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
legacy_init:
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
navi10_reg_base_init(adev);
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
navi14_reg_base_init(adev);
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
navi12_reg_base_init(adev);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nv_set_ip_blocks(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
||||
/* Set IP register base before any HW register access */
|
||||
r = nv_reg_base_init(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
adev->nbio_funcs = &nbio_v2_3_funcs;
|
||||
|
||||
adev->nbio_funcs->detect_hw_virt(adev);
|
||||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
|
||||
|
@ -402,6 +447,25 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
|
|||
if (adev->enable_mes)
|
||||
amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
||||
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
|
||||
is_support_sw_smu(adev))
|
||||
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
|
||||
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
|
||||
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
|
||||
else if (amdgpu_device_has_dc_support(adev))
|
||||
amdgpu_device_ip_block_add(adev, &dm_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
|
||||
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
|
||||
is_support_sw_smu(adev))
|
||||
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -496,6 +560,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
|
|||
.read_bios_from_rom = &nv_read_bios_from_rom,
|
||||
.read_register = &nv_read_register,
|
||||
.reset = &nv_asic_reset,
|
||||
.reset_method = &nv_asic_reset_method,
|
||||
.set_vga_state = &nv_vga_set_state,
|
||||
.get_xclk = &nv_get_xclk,
|
||||
.set_uvd_clocks = &nv_set_uvd_clocks,
|
||||
|
@ -556,6 +621,30 @@ static int nv_common_early_init(void *handle)
|
|||
AMD_PG_SUPPORT_ATHUB;
|
||||
adev->external_rev_id = adev->rev_id + 0x1;
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_CGCG |
|
||||
AMD_CG_SUPPORT_IH_CG |
|
||||
AMD_CG_SUPPORT_HDP_MGCG |
|
||||
AMD_CG_SUPPORT_HDP_LS |
|
||||
AMD_CG_SUPPORT_SDMA_MGCG |
|
||||
AMD_CG_SUPPORT_SDMA_LS |
|
||||
AMD_CG_SUPPORT_MC_MGCG |
|
||||
AMD_CG_SUPPORT_MC_LS |
|
||||
AMD_CG_SUPPORT_ATHUB_MGCG |
|
||||
AMD_CG_SUPPORT_ATHUB_LS |
|
||||
AMD_CG_SUPPORT_VCN_MGCG |
|
||||
AMD_CG_SUPPORT_BIF_MGCG |
|
||||
AMD_CG_SUPPORT_BIF_LS;
|
||||
adev->pg_flags = AMD_PG_SUPPORT_VCN |
|
||||
AMD_PG_SUPPORT_VCN_DPG;
|
||||
adev->external_rev_id = adev->rev_id + 20;
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
adev->cg_flags = 0;
|
||||
adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG;
|
||||
adev->external_rev_id = adev->rev_id + 0xa;
|
||||
break;
|
||||
default:
|
||||
/* FIXME: not supported yet */
|
||||
return -EINVAL;
|
||||
|
@ -748,6 +837,8 @@ static int nv_common_set_clockgating_state(void *handle,
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
adev->nbio_funcs->update_medium_grain_clock_gating(adev,
|
||||
state == AMD_CG_STATE_GATE ? true : false);
|
||||
adev->nbio_funcs->update_medium_grain_light_sleep(adev,
|
||||
|
|
|
@ -30,4 +30,6 @@ void nv_grbm_select(struct amdgpu_device *adev,
|
|||
u32 me, u32 pipe, u32 queue, u32 vmid);
|
||||
int nv_set_ip_blocks(struct amdgpu_device *adev);
|
||||
int navi10_reg_base_init(struct amdgpu_device *adev);
|
||||
int navi14_reg_base_init(struct amdgpu_device *adev);
|
||||
int navi12_reg_base_init(struct amdgpu_device *adev);
|
||||
#endif
|
||||
|
|
|
@ -233,8 +233,15 @@ enum psp_gfx_fw_type {
|
|||
GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */
|
||||
GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */
|
||||
GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */
|
||||
GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV */
|
||||
GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV */
|
||||
GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV + RN */
|
||||
GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV + RN */
|
||||
GFX_FW_TYPE_DMUB = 51, /* DMUB RN */
|
||||
GFX_FW_TYPE_SDMA2 = 52, /* SDMA2 MI */
|
||||
GFX_FW_TYPE_SDMA3 = 53, /* SDMA3 MI */
|
||||
GFX_FW_TYPE_SDMA4 = 54, /* SDMA4 MI */
|
||||
GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */
|
||||
GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */
|
||||
GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */
|
||||
GFX_FW_TYPE_MAX
|
||||
};
|
||||
|
||||
|
|
|
@ -43,6 +43,12 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
|
|||
MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi10_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi10_asd.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_asd.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_asd.bin");
|
||||
MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
|
||||
MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
|
||||
|
||||
/* address block */
|
||||
#define smnMP1_FIRMWARE_FLAGS 0x3010024
|
||||
|
@ -60,6 +66,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
|
|||
int err = 0;
|
||||
const struct psp_firmware_header_v1_0 *sos_hdr;
|
||||
const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
|
||||
const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
|
||||
const struct psp_firmware_header_v1_0 *asd_hdr;
|
||||
const struct ta_firmware_header_v1_0 *ta_hdr;
|
||||
|
||||
|
@ -72,6 +79,15 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
|
|||
case CHIP_NAVI10:
|
||||
chip_name = "navi10";
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
chip_name = "navi14";
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
chip_name = "navi12";
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
chip_name = "arcturus";
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
@ -107,6 +123,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
|
|||
adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
|
||||
le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes);
|
||||
}
|
||||
if (sos_hdr->header.header_version_minor == 2) {
|
||||
sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data;
|
||||
adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes);
|
||||
adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
|
||||
le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev,
|
||||
|
@ -158,6 +180,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
|
|||
}
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
case CHIP_NAVI12:
|
||||
case CHIP_ARCTURUS:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
|
|
|
@ -636,7 +636,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
|
|||
|
||||
static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
|
||||
{
|
||||
if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
|
|
@ -34,6 +34,18 @@
|
|||
#include "sdma0/sdma0_4_2_sh_mask.h"
|
||||
#include "sdma1/sdma1_4_2_offset.h"
|
||||
#include "sdma1/sdma1_4_2_sh_mask.h"
|
||||
#include "sdma2/sdma2_4_2_2_offset.h"
|
||||
#include "sdma2/sdma2_4_2_2_sh_mask.h"
|
||||
#include "sdma3/sdma3_4_2_2_offset.h"
|
||||
#include "sdma3/sdma3_4_2_2_sh_mask.h"
|
||||
#include "sdma4/sdma4_4_2_2_offset.h"
|
||||
#include "sdma4/sdma4_4_2_2_sh_mask.h"
|
||||
#include "sdma5/sdma5_4_2_2_offset.h"
|
||||
#include "sdma5/sdma5_4_2_2_sh_mask.h"
|
||||
#include "sdma6/sdma6_4_2_2_offset.h"
|
||||
#include "sdma6/sdma6_4_2_2_sh_mask.h"
|
||||
#include "sdma7/sdma7_4_2_2_offset.h"
|
||||
#include "sdma7/sdma7_4_2_2_sh_mask.h"
|
||||
#include "hdp/hdp_4_0_offset.h"
|
||||
#include "sdma0/sdma0_4_1_default.h"
|
||||
|
||||
|
@ -55,6 +67,7 @@ MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
|
|||
MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
|
||||
|
||||
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
|
||||
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
|
||||
|
@ -202,25 +215,120 @@ static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
|
|||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_sdma_arct[] =
|
||||
{
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
|
||||
SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002)
|
||||
};
|
||||
|
||||
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
|
||||
u32 instance, u32 offset)
|
||||
{
|
||||
return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) :
|
||||
(adev->reg_offset[SDMA1_HWIP][0][0] + offset));
|
||||
switch (instance) {
|
||||
case 0:
|
||||
return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
|
||||
case 1:
|
||||
return (adev->reg_offset[SDMA1_HWIP][0][0] + offset);
|
||||
case 2:
|
||||
return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
|
||||
case 3:
|
||||
return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
|
||||
case 4:
|
||||
return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
|
||||
case 5:
|
||||
return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
|
||||
case 6:
|
||||
return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
|
||||
case 7:
|
||||
return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned sdma_v4_0_seq_to_irq_id(int seq_num)
|
||||
{
|
||||
switch (seq_num) {
|
||||
case 0:
|
||||
return SOC15_IH_CLIENTID_SDMA0;
|
||||
case 1:
|
||||
return SOC15_IH_CLIENTID_SDMA1;
|
||||
case 2:
|
||||
return SOC15_IH_CLIENTID_SDMA2;
|
||||
case 3:
|
||||
return SOC15_IH_CLIENTID_SDMA3;
|
||||
case 4:
|
||||
return SOC15_IH_CLIENTID_SDMA4;
|
||||
case 5:
|
||||
return SOC15_IH_CLIENTID_SDMA5;
|
||||
case 6:
|
||||
return SOC15_IH_CLIENTID_SDMA6;
|
||||
case 7:
|
||||
return SOC15_IH_CLIENTID_SDMA7;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
|
||||
{
|
||||
switch (client_id) {
|
||||
case SOC15_IH_CLIENTID_SDMA0:
|
||||
return 0;
|
||||
case SOC15_IH_CLIENTID_SDMA1:
|
||||
return 1;
|
||||
case SOC15_IH_CLIENTID_SDMA2:
|
||||
return 2;
|
||||
case SOC15_IH_CLIENTID_SDMA3:
|
||||
return 3;
|
||||
case SOC15_IH_CLIENTID_SDMA4:
|
||||
return 4;
|
||||
case SOC15_IH_CLIENTID_SDMA5:
|
||||
return 5;
|
||||
case SOC15_IH_CLIENTID_SDMA6:
|
||||
return 6;
|
||||
case SOC15_IH_CLIENTID_SDMA7:
|
||||
return 7;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
|
||||
{
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_4,
|
||||
ARRAY_SIZE(golden_settings_sdma_4));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_vg10,
|
||||
ARRAY_SIZE(golden_settings_sdma_vg10));
|
||||
}
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_4,
|
||||
ARRAY_SIZE(golden_settings_sdma_4));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_vg10,
|
||||
ARRAY_SIZE(golden_settings_sdma_vg10));
|
||||
break;
|
||||
case CHIP_VEGA12:
|
||||
soc15_program_register_sequence(adev,
|
||||
|
@ -241,6 +349,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
golden_settings_sdma1_4_2,
|
||||
ARRAY_SIZE(golden_settings_sdma1_4_2));
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_arct,
|
||||
ARRAY_SIZE(golden_settings_sdma_arct));
|
||||
break;
|
||||
case CHIP_RAVEN:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_4_1,
|
||||
|
@ -259,6 +372,43 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
|
||||
{
|
||||
int err = 0;
|
||||
const struct sdma_firmware_header_v1_0 *hdr;
|
||||
|
||||
err = amdgpu_ucode_validate(sdma_inst->fw);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
|
||||
sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
|
||||
sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
|
||||
|
||||
if (sdma_inst->feature_version >= 20)
|
||||
sdma_inst->burst_nop = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
if (adev->sdma.instance[i].fw != NULL)
|
||||
release_firmware(adev->sdma.instance[i].fw);
|
||||
|
||||
/* arcturus shares the same FW memory across
|
||||
all SDMA isntances */
|
||||
if (adev->asic_type == CHIP_ARCTURUS)
|
||||
break;
|
||||
}
|
||||
|
||||
memset((void*)adev->sdma.instance, 0,
|
||||
sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_v4_0_init_microcode - load ucode images from disk
|
||||
*
|
||||
|
@ -278,7 +428,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
|
|||
int err = 0, i;
|
||||
struct amdgpu_firmware_info *info = NULL;
|
||||
const struct common_firmware_header *header = NULL;
|
||||
const struct sdma_firmware_header_v1_0 *hdr;
|
||||
|
||||
DRM_DEBUG("\n");
|
||||
|
||||
|
@ -300,30 +449,49 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
|
|||
else
|
||||
chip_name = "raven";
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
chip_name = "arcturus";
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
if (i == 0)
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
|
||||
else
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
|
||||
err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
|
||||
if (err)
|
||||
goto out;
|
||||
err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
|
||||
if (err)
|
||||
goto out;
|
||||
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
|
||||
adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
|
||||
adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
|
||||
if (adev->sdma.instance[i].feature_version >= 20)
|
||||
adev->sdma.instance[i].burst_nop = true;
|
||||
DRM_DEBUG("psp_load == '%s'\n",
|
||||
adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
|
||||
|
||||
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
|
||||
err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
for (i = 1; i < adev->sdma.num_instances; i++) {
|
||||
if (adev->asic_type == CHIP_ARCTURUS) {
|
||||
/* Acturus will leverage the same FW memory
|
||||
for every SDMA instance */
|
||||
memcpy((void*)&adev->sdma.instance[i],
|
||||
(void*)&adev->sdma.instance[0],
|
||||
sizeof(struct amdgpu_sdma_instance));
|
||||
}
|
||||
else {
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
|
||||
|
||||
err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
DRM_DEBUG("psp_load == '%s'\n",
|
||||
adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
|
||||
|
||||
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
|
||||
info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
|
||||
info->fw = adev->sdma.instance[i].fw;
|
||||
|
@ -332,13 +500,11 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
|
|||
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (err) {
|
||||
DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
release_firmware(adev->sdma.instance[i].fw);
|
||||
adev->sdma.instance[i].fw = NULL;
|
||||
}
|
||||
sdma_v4_0_destroy_inst_ctx(adev);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
@ -561,10 +727,7 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
|
|||
u32 ref_and_mask = 0;
|
||||
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
|
||||
|
||||
if (ring->me == 0)
|
||||
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
|
||||
else
|
||||
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
|
||||
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
|
||||
|
||||
sdma_v4_0_wait_reg_mem(ring, 0, 1,
|
||||
adev->nbio_funcs->get_hdp_flush_done_offset(adev),
|
||||
|
@ -620,26 +783,27 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
|
|||
*/
|
||||
static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
|
||||
struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
|
||||
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
|
||||
u32 rb_cntl, ib_cntl;
|
||||
int i;
|
||||
|
||||
if ((adev->mman.buffer_funcs_ring == sdma0) ||
|
||||
(adev->mman.buffer_funcs_ring == sdma1))
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, false);
|
||||
int i, unset = 0;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
sdma[i] = &adev->sdma.instance[i].ring;
|
||||
|
||||
if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, false);
|
||||
unset = 1;
|
||||
}
|
||||
|
||||
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
|
||||
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
|
||||
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
|
||||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
|
||||
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
|
||||
}
|
||||
|
||||
sdma0->sched.ready = false;
|
||||
sdma1->sched.ready = false;
|
||||
sdma[i]->sched.ready = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -663,16 +827,20 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
|
|||
*/
|
||||
static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page;
|
||||
struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page;
|
||||
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
|
||||
u32 rb_cntl, ib_cntl;
|
||||
int i;
|
||||
|
||||
if ((adev->mman.buffer_funcs_ring == sdma0) ||
|
||||
(adev->mman.buffer_funcs_ring == sdma1))
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, false);
|
||||
bool unset = false;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
sdma[i] = &adev->sdma.instance[i].page;
|
||||
|
||||
if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
|
||||
(unset == false)) {
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, false);
|
||||
unset = true;
|
||||
}
|
||||
|
||||
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
|
||||
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
|
||||
RB_ENABLE, 0);
|
||||
|
@ -681,10 +849,9 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
|
|||
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
|
||||
IB_ENABLE, 0);
|
||||
WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
|
||||
}
|
||||
|
||||
sdma0->sched.ready = false;
|
||||
sdma1->sched.ready = false;
|
||||
sdma[i]->sched.ready = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1475,6 +1642,8 @@ static int sdma_v4_0_early_init(void *handle)
|
|||
|
||||
if (adev->asic_type == CHIP_RAVEN)
|
||||
adev->sdma.num_instances = 1;
|
||||
else if (adev->asic_type == CHIP_ARCTURUS)
|
||||
adev->sdma.num_instances = 8;
|
||||
else
|
||||
adev->sdma.num_instances = 2;
|
||||
|
||||
|
@ -1499,6 +1668,7 @@ static int sdma_v4_0_early_init(void *handle)
|
|||
}
|
||||
|
||||
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
|
||||
static int sdma_v4_0_late_init(void *handle)
|
||||
|
@ -1518,7 +1688,7 @@ static int sdma_v4_0_late_init(void *handle)
|
|||
.sub_block_index = 0,
|
||||
.name = "sdma",
|
||||
};
|
||||
int r;
|
||||
int r, i;
|
||||
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
|
||||
amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
|
||||
|
@ -1575,14 +1745,11 @@ static int sdma_v4_0_late_init(void *handle)
|
|||
if (r)
|
||||
goto sysfs;
|
||||
resume:
|
||||
r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0);
|
||||
if (r)
|
||||
goto irq;
|
||||
|
||||
r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1);
|
||||
if (r) {
|
||||
amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0);
|
||||
goto irq;
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE0 + i);
|
||||
if (r)
|
||||
goto irq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1606,28 +1773,22 @@ static int sdma_v4_0_sw_init(void *handle)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
/* SDMA trap event */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP,
|
||||
&adev->sdma.trap_irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* SDMA trap event */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP,
|
||||
&adev->sdma.trap_irq);
|
||||
if (r)
|
||||
return r;
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
|
||||
SDMA0_4_0__SRCID__SDMA_TRAP,
|
||||
&adev->sdma.trap_irq);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* SDMA SRAM ECC event */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
|
||||
&adev->sdma.ecc_irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* SDMA SRAM ECC event */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC,
|
||||
&adev->sdma.ecc_irq);
|
||||
if (r)
|
||||
return r;
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
|
||||
SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
|
||||
&adev->sdma.ecc_irq);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
|
@ -1641,11 +1802,8 @@ static int sdma_v4_0_sw_init(void *handle)
|
|||
ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
|
||||
|
||||
sprintf(ring->name, "sdma%d", i);
|
||||
r = amdgpu_ring_init(adev, ring, 1024,
|
||||
&adev->sdma.trap_irq,
|
||||
(i == 0) ?
|
||||
AMDGPU_SDMA_IRQ_INSTANCE0 :
|
||||
AMDGPU_SDMA_IRQ_INSTANCE1);
|
||||
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE0 + i);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -1663,9 +1821,7 @@ static int sdma_v4_0_sw_init(void *handle)
|
|||
sprintf(ring->name, "page%d", i);
|
||||
r = amdgpu_ring_init(adev, ring, 1024,
|
||||
&adev->sdma.trap_irq,
|
||||
(i == 0) ?
|
||||
AMDGPU_SDMA_IRQ_INSTANCE0 :
|
||||
AMDGPU_SDMA_IRQ_INSTANCE1);
|
||||
AMDGPU_SDMA_IRQ_INSTANCE0 + i);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
@ -1701,10 +1857,7 @@ static int sdma_v4_0_sw_fini(void *handle)
|
|||
amdgpu_ring_fini(&adev->sdma.instance[i].page);
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
release_firmware(adev->sdma.instance[i].fw);
|
||||
adev->sdma.instance[i].fw = NULL;
|
||||
}
|
||||
sdma_v4_0_destroy_inst_ctx(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1718,7 +1871,8 @@ static int sdma_v4_0_hw_init(void *handle)
|
|||
adev->powerplay.pp_funcs->set_powergating_by_smu)
|
||||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
|
||||
|
||||
sdma_v4_0_init_golden_registers(adev);
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
sdma_v4_0_init_golden_registers(adev);
|
||||
|
||||
r = sdma_v4_0_start(adev);
|
||||
|
||||
|
@ -1728,12 +1882,15 @@ static int sdma_v4_0_hw_init(void *handle)
|
|||
static int sdma_v4_0_hw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
int i;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return 0;
|
||||
|
||||
amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0);
|
||||
amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1);
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE0 + i);
|
||||
}
|
||||
|
||||
sdma_v4_0_ctx_switch_enable(adev, false);
|
||||
sdma_v4_0_enable(adev, false);
|
||||
|
@ -1776,15 +1933,17 @@ static bool sdma_v4_0_is_idle(void *handle)
|
|||
|
||||
static int sdma_v4_0_wait_for_idle(void *handle)
|
||||
{
|
||||
unsigned i;
|
||||
u32 sdma0, sdma1;
|
||||
unsigned i, j;
|
||||
u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG);
|
||||
sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG);
|
||||
|
||||
if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
|
||||
for (j = 0; j < adev->sdma.num_instances; j++) {
|
||||
sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG);
|
||||
if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK))
|
||||
break;
|
||||
}
|
||||
if (j == adev->sdma.num_instances)
|
||||
return 0;
|
||||
udelay(1);
|
||||
}
|
||||
|
@ -1820,17 +1979,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
|
|||
uint32_t instance;
|
||||
|
||||
DRM_DEBUG("IH: SDMA trap\n");
|
||||
switch (entry->client_id) {
|
||||
case SOC15_IH_CLIENTID_SDMA0:
|
||||
instance = 0;
|
||||
break;
|
||||
case SOC15_IH_CLIENTID_SDMA1:
|
||||
instance = 1;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
|
||||
switch (entry->ring_id) {
|
||||
case 0:
|
||||
amdgpu_fence_process(&adev->sdma.instance[instance].ring);
|
||||
|
@ -1851,20 +2000,15 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
uint32_t instance, err_source;
|
||||
uint32_t err_source;
|
||||
int instance;
|
||||
|
||||
switch (entry->client_id) {
|
||||
case SOC15_IH_CLIENTID_SDMA0:
|
||||
instance = 0;
|
||||
break;
|
||||
case SOC15_IH_CLIENTID_SDMA1:
|
||||
instance = 1;
|
||||
break;
|
||||
default:
|
||||
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
|
||||
if (instance < 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (entry->src_id) {
|
||||
case SDMA0_4_0__SRCID__SDMA_SRAM_ECC:
|
||||
|
@ -1881,7 +2025,7 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
|
|||
|
||||
amdgpu_ras_reset_gpu(adev, 0);
|
||||
|
||||
return AMDGPU_RAS_UE;
|
||||
return AMDGPU_RAS_SUCCESS;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev,
|
||||
|
@ -1910,16 +2054,9 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
|
|||
|
||||
DRM_ERROR("Illegal instruction in SDMA command stream\n");
|
||||
|
||||
switch (entry->client_id) {
|
||||
case SOC15_IH_CLIENTID_SDMA0:
|
||||
instance = 0;
|
||||
break;
|
||||
case SOC15_IH_CLIENTID_SDMA1:
|
||||
instance = 1;
|
||||
break;
|
||||
default:
|
||||
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
|
||||
if (instance < 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (entry->ring_id) {
|
||||
case 0:
|
||||
|
@ -1936,14 +2073,10 @@ static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
|
|||
{
|
||||
u32 sdma_edc_config;
|
||||
|
||||
u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ?
|
||||
sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) :
|
||||
sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG);
|
||||
|
||||
sdma_edc_config = RREG32(reg_offset);
|
||||
sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG);
|
||||
sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
|
||||
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
|
||||
WREG32(reg_offset, sdma_edc_config);
|
||||
WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2133,7 +2266,43 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
|
||||
.support_64bit_ptrs = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = sdma_v4_0_ring_get_rptr,
|
||||
.get_wptr = sdma_v4_0_ring_get_wptr,
|
||||
.set_wptr = sdma_v4_0_ring_set_wptr,
|
||||
.emit_frame_size =
|
||||
6 + /* sdma_v4_0_ring_emit_hdp_flush */
|
||||
3 + /* hdp invalidate */
|
||||
6 + /* sdma_v4_0_ring_emit_pipeline_sync */
|
||||
/* sdma_v4_0_ring_emit_vm_flush */
|
||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
|
||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
|
||||
10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
|
||||
.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
|
||||
.emit_ib = sdma_v4_0_ring_emit_ib,
|
||||
.emit_fence = sdma_v4_0_ring_emit_fence,
|
||||
.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
|
||||
.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
|
||||
.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
|
||||
.test_ring = sdma_v4_0_ring_test_ring,
|
||||
.test_ib = sdma_v4_0_ring_test_ib,
|
||||
.insert_nop = sdma_v4_0_ring_insert_nop,
|
||||
.pad_ib = sdma_v4_0_ring_pad_ib,
|
||||
.emit_wreg = sdma_v4_0_ring_emit_wreg,
|
||||
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
|
||||
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
|
||||
};
|
||||
|
||||
/*
|
||||
* On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1).
|
||||
* So create a individual constant ring_funcs for those instances.
|
||||
*/
|
||||
static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
|
||||
.type = AMDGPU_RING_TYPE_SDMA,
|
||||
.align_mask = 0xf,
|
||||
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
|
||||
.support_64bit_ptrs = true,
|
||||
.vmhub = AMDGPU_MMHUB_1,
|
||||
.get_rptr = sdma_v4_0_ring_get_rptr,
|
||||
.get_wptr = sdma_v4_0_ring_get_wptr,
|
||||
.set_wptr = sdma_v4_0_ring_set_wptr,
|
||||
|
@ -2165,7 +2334,39 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
|
||||
.support_64bit_ptrs = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = sdma_v4_0_ring_get_rptr,
|
||||
.get_wptr = sdma_v4_0_page_ring_get_wptr,
|
||||
.set_wptr = sdma_v4_0_page_ring_set_wptr,
|
||||
.emit_frame_size =
|
||||
6 + /* sdma_v4_0_ring_emit_hdp_flush */
|
||||
3 + /* hdp invalidate */
|
||||
6 + /* sdma_v4_0_ring_emit_pipeline_sync */
|
||||
/* sdma_v4_0_ring_emit_vm_flush */
|
||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
|
||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
|
||||
10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
|
||||
.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
|
||||
.emit_ib = sdma_v4_0_ring_emit_ib,
|
||||
.emit_fence = sdma_v4_0_ring_emit_fence,
|
||||
.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
|
||||
.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
|
||||
.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
|
||||
.test_ring = sdma_v4_0_ring_test_ring,
|
||||
.test_ib = sdma_v4_0_ring_test_ib,
|
||||
.insert_nop = sdma_v4_0_ring_insert_nop,
|
||||
.pad_ib = sdma_v4_0_ring_pad_ib,
|
||||
.emit_wreg = sdma_v4_0_ring_emit_wreg,
|
||||
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
|
||||
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
|
||||
};
|
||||
|
||||
static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
|
||||
.type = AMDGPU_RING_TYPE_SDMA,
|
||||
.align_mask = 0xf,
|
||||
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
|
||||
.support_64bit_ptrs = true,
|
||||
.vmhub = AMDGPU_MMHUB_1,
|
||||
.get_rptr = sdma_v4_0_ring_get_rptr,
|
||||
.get_wptr = sdma_v4_0_page_ring_get_wptr,
|
||||
.set_wptr = sdma_v4_0_page_ring_set_wptr,
|
||||
|
@ -2197,10 +2398,20 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
|
||||
if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
|
||||
adev->sdma.instance[i].ring.funcs =
|
||||
&sdma_v4_0_ring_funcs_2nd_mmhub;
|
||||
else
|
||||
adev->sdma.instance[i].ring.funcs =
|
||||
&sdma_v4_0_ring_funcs;
|
||||
adev->sdma.instance[i].ring.me = i;
|
||||
if (adev->sdma.has_page_queue) {
|
||||
adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs;
|
||||
if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
|
||||
adev->sdma.instance[i].page.funcs =
|
||||
&sdma_v4_0_page_ring_funcs_2nd_mmhub;
|
||||
else
|
||||
adev->sdma.instance[i].page.funcs =
|
||||
&sdma_v4_0_page_ring_funcs;
|
||||
adev->sdma.instance[i].page.me = i;
|
||||
}
|
||||
}
|
||||
|
@ -2224,10 +2435,23 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
|
|||
|
||||
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
switch (adev->sdma.num_instances) {
|
||||
case 1:
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
|
||||
break;
|
||||
case 8:
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
break;
|
||||
case 2:
|
||||
default:
|
||||
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
|
||||
break;
|
||||
}
|
||||
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
|
||||
adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
|
||||
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
|
||||
adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,8 +21,11 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <linux/delay.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <drm/drmP.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_ucode.h"
|
||||
#include "amdgpu_trace.h"
|
||||
|
@ -42,6 +45,12 @@
|
|||
MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin");
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/navi14_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
|
||||
|
||||
#define SDMA1_REG_OFFSET 0x600
|
||||
#define SDMA0_HYP_DEC_REG_START 0x5880
|
||||
#define SDMA0_HYP_DEC_REG_END 0x5893
|
||||
|
@ -59,7 +68,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {
|
|||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
|
@ -71,7 +80,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {
|
|||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
|
@ -80,6 +89,18 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {
|
|||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_sdma_nv10[] = {
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_sdma_nv14[] = {
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_sdma_nv12[] = {
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
|
||||
};
|
||||
|
||||
static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
|
||||
|
@ -111,6 +132,22 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
golden_settings_sdma_nv10,
|
||||
(const u32)ARRAY_SIZE(golden_settings_sdma_nv10));
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_5,
|
||||
(const u32)ARRAY_SIZE(golden_settings_sdma_5));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_nv14,
|
||||
(const u32)ARRAY_SIZE(golden_settings_sdma_nv14));
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_5,
|
||||
(const u32)ARRAY_SIZE(golden_settings_sdma_5));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_nv12,
|
||||
(const u32)ARRAY_SIZE(golden_settings_sdma_nv12));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -143,6 +180,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
|
|||
case CHIP_NAVI10:
|
||||
chip_name = "navi10";
|
||||
break;
|
||||
case CHIP_NAVI14:
|
||||
chip_name = "navi14";
|
||||
break;
|
||||
case CHIP_NAVI12:
|
||||
chip_name = "navi12";
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
@ -861,7 +904,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
|
|||
if (amdgpu_emu_mode == 1)
|
||||
msleep(1);
|
||||
else
|
||||
DRM_UDELAY(1);
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
|
@ -1316,7 +1359,7 @@ static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
|
|||
if (ring->trail_seq ==
|
||||
le32_to_cpu(*(ring->trail_fence_cpu_addr)))
|
||||
break;
|
||||
DRM_UDELAY(1);
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout) {
|
||||
|
@ -1472,6 +1515,7 @@ static int sdma_v5_0_set_clockgating_state(void *handle,
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_NAVI10:
|
||||
case CHIP_NAVI14:
|
||||
sdma_v5_0_update_medium_grain_clock_gating(adev,
|
||||
state == AMD_CG_STATE_GATE ? true : false);
|
||||
sdma_v5_0_update_medium_grain_light_sleep(adev,
|
||||
|
@ -1532,7 +1576,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
|
||||
.support_64bit_ptrs = true,
|
||||
.vmhub = AMDGPU_GFXHUB,
|
||||
.vmhub = AMDGPU_GFXHUB_0,
|
||||
.get_rptr = sdma_v5_0_ring_get_rptr,
|
||||
.get_wptr = sdma_v5_0_ring_get_wptr,
|
||||
.set_wptr = sdma_v5_0_ring_set_wptr,
|
||||
|
|
|
@ -1186,6 +1186,12 @@ static int si_asic_reset(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static enum amd_reset_method
|
||||
si_asic_reset_method(struct amdgpu_device *adev)
|
||||
{
|
||||
return AMD_RESET_METHOD_LEGACY;
|
||||
}
|
||||
|
||||
static u32 si_get_config_memsize(struct amdgpu_device *adev)
|
||||
{
|
||||
return RREG32(mmCONFIG_MEMSIZE);
|
||||
|
@ -1394,6 +1400,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
|
|||
.read_bios_from_rom = &si_read_bios_from_rom,
|
||||
.read_register = &si_read_register,
|
||||
.reset = &si_asic_reset,
|
||||
.reset_method = &si_asic_reset_method,
|
||||
.set_vga_state = &si_vga_set_state,
|
||||
.get_xclk = &si_get_xclk,
|
||||
.set_uvd_clocks = &si_set_uvd_clocks,
|
||||
|
|
|
@ -63,6 +63,7 @@
|
|||
#include "uvd_v7_0.h"
|
||||
#include "vce_v4_0.h"
|
||||
#include "vcn_v1_0.h"
|
||||
#include "vcn_v2_5.h"
|
||||
#include "dce_virtual.h"
|
||||
#include "mxgpu_ai.h"
|
||||
#include "amdgpu_smu.h"
|
||||
|
@ -115,6 +116,49 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
|
|||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
}
|
||||
|
||||
static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
|
||||
{
|
||||
unsigned long flags, address, data;
|
||||
u64 r;
|
||||
address = adev->nbio_funcs->get_pcie_index_offset(adev);
|
||||
data = adev->nbio_funcs->get_pcie_data_offset(adev);
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
/* read low 32 bit */
|
||||
WREG32(address, reg);
|
||||
(void)RREG32(address);
|
||||
r = RREG32(data);
|
||||
|
||||
/* read high 32 bit*/
|
||||
WREG32(address, reg + 4);
|
||||
(void)RREG32(address);
|
||||
r |= ((u64)RREG32(data) << 32);
|
||||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
|
||||
{
|
||||
unsigned long flags, address, data;
|
||||
|
||||
address = adev->nbio_funcs->get_pcie_index_offset(adev);
|
||||
data = adev->nbio_funcs->get_pcie_data_offset(adev);
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
/* write low 32 bit */
|
||||
WREG32(address, reg);
|
||||
(void)RREG32(address);
|
||||
WREG32(data, (u32)(v & 0xffffffffULL));
|
||||
(void)RREG32(data);
|
||||
|
||||
/* write high 32 bit */
|
||||
WREG32(address, reg + 4);
|
||||
(void)RREG32(address);
|
||||
WREG32(data, (u32)(v >> 32));
|
||||
(void)RREG32(data);
|
||||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
}
|
||||
|
||||
static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
|
||||
{
|
||||
unsigned long flags, address, data;
|
||||
|
@ -464,12 +508,14 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int soc15_asic_reset(struct amdgpu_device *adev)
|
||||
static enum amd_reset_method
|
||||
soc15_asic_reset_method(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret;
|
||||
bool baco_reset;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_RAVEN:
|
||||
return AMD_RESET_METHOD_MODE2;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
soc15_asic_get_baco_capability(adev, &baco_reset);
|
||||
|
@ -493,6 +539,16 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
|
|||
}
|
||||
|
||||
if (baco_reset)
|
||||
return AMD_RESET_METHOD_BACO;
|
||||
else
|
||||
return AMD_RESET_METHOD_MODE1;
|
||||
}
|
||||
|
||||
static int soc15_asic_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (soc15_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
|
||||
ret = soc15_asic_baco_reset(adev);
|
||||
else
|
||||
ret = soc15_asic_mode1_reset(adev);
|
||||
|
@ -586,21 +642,25 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
|
|||
case CHIP_VEGA20:
|
||||
vega20_reg_base_init(adev);
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
arct_reg_base_init(adev);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
|
||||
adev->gmc.xgmi.supported = true;
|
||||
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
adev->nbio_funcs = &nbio_v7_0_funcs;
|
||||
else if (adev->asic_type == CHIP_VEGA20)
|
||||
else if (adev->asic_type == CHIP_VEGA20 ||
|
||||
adev->asic_type == CHIP_ARCTURUS)
|
||||
adev->nbio_funcs = &nbio_v7_4_funcs;
|
||||
else
|
||||
adev->nbio_funcs = &nbio_v6_1_funcs;
|
||||
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
|
||||
adev->df_funcs = &df_v3_6_funcs;
|
||||
else
|
||||
adev->df_funcs = &df_v1_7_funcs;
|
||||
|
@ -672,6 +732,17 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
|
|||
#endif
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
||||
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
|
||||
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -688,7 +759,7 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev,
|
|||
struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring || !ring->funcs->emit_wreg)
|
||||
WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
|
||||
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
|
||||
else
|
||||
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
|
||||
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
|
||||
|
@ -714,14 +785,9 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
|||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs */
|
||||
/* Reg 104 is # of posted requests sent */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
|
||||
/* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
|
||||
EVENT1_SEL, 108);
|
||||
else
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
|
||||
EVENT1_SEL, 104);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
|
||||
|
@ -751,6 +817,55 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
|||
*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctr = 0;
|
||||
uint64_t cnt0_of, cnt1_of;
|
||||
int tmp;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* Reg 40 is # received msgs */
|
||||
/* Reg 108 is # of posted requests sent on VG20 */
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3,
|
||||
EVENT0_SEL, 40);
|
||||
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3,
|
||||
EVENT1_SEL, 108);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr);
|
||||
/* Zero out and enable the perf counters
|
||||
* Write 0x5:
|
||||
* Bit 0 = Start all counters(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Load the shadow and disable the perf counters
|
||||
* Write 0x2:
|
||||
* Bit 0 = Stop counters(0)
|
||||
* Bit 1 = Load the shadow counters(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
|
||||
|
||||
/* Read register values to get any >32bit overflow */
|
||||
tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3);
|
||||
cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER);
|
||||
cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER);
|
||||
|
||||
/* Get the values and add the overflow */
|
||||
*count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32);
|
||||
*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 sol_reg;
|
||||
|
@ -792,6 +907,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
|||
.read_bios_from_rom = &soc15_read_bios_from_rom,
|
||||
.read_register = &soc15_read_register,
|
||||
.reset = &soc15_asic_reset,
|
||||
.reset_method = &soc15_asic_reset_method,
|
||||
.set_vga_state = &soc15_vga_set_state,
|
||||
.get_xclk = &soc15_get_xclk,
|
||||
.set_uvd_clocks = &soc15_set_uvd_clocks,
|
||||
|
@ -821,9 +937,10 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
|||
.invalidate_hdp = &soc15_invalidate_hdp,
|
||||
.need_full_reset = &soc15_need_full_reset,
|
||||
.init_doorbell_index = &vega20_doorbell_index_init,
|
||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||
.get_pcie_usage = &vega20_get_pcie_usage,
|
||||
.need_reset_on_init = &soc15_need_reset_on_init,
|
||||
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
|
||||
.reset_method = &soc15_asic_reset_method
|
||||
};
|
||||
|
||||
static int soc15_common_early_init(void *handle)
|
||||
|
@ -837,6 +954,8 @@ static int soc15_common_early_init(void *handle)
|
|||
adev->smc_wreg = NULL;
|
||||
adev->pcie_rreg = &soc15_pcie_rreg;
|
||||
adev->pcie_wreg = &soc15_pcie_wreg;
|
||||
adev->pcie_rreg64 = &soc15_pcie_rreg64;
|
||||
adev->pcie_wreg64 = &soc15_pcie_wreg64;
|
||||
adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
|
||||
adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;
|
||||
adev->didt_rreg = &soc15_didt_rreg;
|
||||
|
@ -998,6 +1117,12 @@ static int soc15_common_early_init(void *handle)
|
|||
AMD_PG_SUPPORT_CP |
|
||||
AMD_PG_SUPPORT_RLC_SMU_HS;
|
||||
break;
|
||||
case CHIP_ARCTURUS:
|
||||
adev->asic_funcs = &vega20_asic_funcs;
|
||||
adev->cg_flags = 0;
|
||||
adev->pg_flags = 0;
|
||||
adev->external_rev_id = adev->rev_id + 0x32;
|
||||
break;
|
||||
default:
|
||||
/* FIXME: not supported yet */
|
||||
return -EINVAL;
|
||||
|
@ -1043,21 +1168,18 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
|
|||
int i;
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
/* Two reasons to skip
|
||||
* 1, Host driver already programmed them
|
||||
* 2, To avoid registers program violations in SR-IOV
|
||||
*/
|
||||
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||
/* sdma/ih doorbell range are programed by hypervisor */
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
adev->nbio_funcs->sdma_doorbell_range(adev, i,
|
||||
ring->use_doorbell, ring->doorbell_index,
|
||||
adev->doorbell_index.sdma_doorbell_range);
|
||||
}
|
||||
}
|
||||
|
||||
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
|
||||
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
|
||||
adev->irq.ih.doorbell_index);
|
||||
}
|
||||
}
|
||||
|
||||
static int soc15_common_hw_init(void *handle)
|
||||
|
|
|
@ -77,6 +77,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
|
|||
|
||||
int vega10_reg_base_init(struct amdgpu_device *adev);
|
||||
int vega20_reg_base_init(struct amdgpu_device *adev);
|
||||
int arct_reg_base_init(struct amdgpu_device *adev);
|
||||
|
||||
void vega10_doorbell_index_init(struct amdgpu_device *adev);
|
||||
void vega20_doorbell_index_init(struct amdgpu_device *adev);
|
||||
|
|
|
@ -69,9 +69,10 @@
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a)))
|
||||
#define WREG32_RLC(reg, value) \
|
||||
do { \
|
||||
if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
|
||||
if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \
|
||||
uint32_t i = 0; \
|
||||
uint32_t retries = 50000; \
|
||||
uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
|
||||
|
@ -96,7 +97,7 @@
|
|||
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
|
||||
do { \
|
||||
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
|
||||
if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
|
||||
if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \
|
||||
uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
|
||||
uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
|
||||
uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
|
||||
|
|
|
@ -0,0 +1,255 @@
|
|||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "umc_v6_1.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu.h"
|
||||
|
||||
#include "rsmu/rsmu_0_0_2_offset.h"
|
||||
#include "rsmu/rsmu_0_0_2_sh_mask.h"
|
||||
#include "umc/umc_6_1_1_offset.h"
|
||||
#include "umc/umc_6_1_1_sh_mask.h"
|
||||
|
||||
#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10
|
||||
|
||||
/*
|
||||
* (addr / 256) * 8192, the higher 26 bits in ErrorAddr
|
||||
* is the index of 8KB block
|
||||
*/
|
||||
#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
|
||||
/* channel index is the index of 256B block */
|
||||
#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
|
||||
/* offset in 256B block */
|
||||
#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL)
|
||||
|
||||
const uint32_t
|
||||
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
|
||||
{2, 18, 11, 27}, {4, 20, 13, 29},
|
||||
{1, 17, 8, 24}, {7, 23, 14, 30},
|
||||
{10, 26, 3, 19}, {12, 28, 5, 21},
|
||||
{9, 25, 0, 16}, {15, 31, 6, 22}
|
||||
};
|
||||
|
||||
static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev,
|
||||
uint32_t umc_instance)
|
||||
{
|
||||
uint32_t rsmu_umc_index;
|
||||
|
||||
rsmu_umc_index = RREG32_SOC15(RSMU, 0,
|
||||
mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
|
||||
rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
|
||||
RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
|
||||
RSMU_UMC_INDEX_MODE_EN, 1);
|
||||
rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
|
||||
RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
|
||||
RSMU_UMC_INDEX_INSTANCE, umc_instance);
|
||||
rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
|
||||
RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
|
||||
RSMU_UMC_INDEX_WREN, 1 << umc_instance);
|
||||
WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
|
||||
rsmu_umc_index);
|
||||
}
|
||||
|
||||
static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
|
||||
RSMU_UMC_INDEX_MODE_EN, 0);
|
||||
}
|
||||
|
||||
static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
|
||||
uint32_t umc_reg_offset,
|
||||
unsigned long *error_count)
|
||||
{
|
||||
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
|
||||
uint32_t ecc_err_cnt, ecc_err_cnt_addr;
|
||||
uint64_t mc_umc_status;
|
||||
uint32_t mc_umc_status_addr;
|
||||
|
||||
ecc_err_cnt_sel_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
|
||||
ecc_err_cnt_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
|
||||
mc_umc_status_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
|
||||
|
||||
/* select the lower chip and check the error count */
|
||||
ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
|
||||
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
|
||||
EccErrCntCsSel, 0);
|
||||
WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
|
||||
ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
|
||||
*error_count +=
|
||||
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
|
||||
UMC_V6_1_CE_CNT_INIT);
|
||||
/* clear the lower chip err count */
|
||||
WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
|
||||
|
||||
/* select the higher chip and check the err counter */
|
||||
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
|
||||
EccErrCntCsSel, 1);
|
||||
WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
|
||||
ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
|
||||
*error_count +=
|
||||
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
|
||||
UMC_V6_1_CE_CNT_INIT);
|
||||
/* clear the higher chip err count */
|
||||
WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
|
||||
|
||||
/* check for SRAM correctable error
|
||||
MCUMC_STATUS is a 64 bit register */
|
||||
mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
|
||||
*error_count += 1;
|
||||
}
|
||||
|
||||
static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
|
||||
uint32_t umc_reg_offset,
|
||||
unsigned long *error_count)
|
||||
{
|
||||
uint64_t mc_umc_status;
|
||||
uint32_t mc_umc_status_addr;
|
||||
|
||||
mc_umc_status_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
|
||||
|
||||
/* check the MCUMC_STATUS */
|
||||
mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
|
||||
if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
|
||||
*error_count += 1;
|
||||
}
|
||||
|
||||
static void umc_v6_1_query_error_count(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data, uint32_t umc_reg_offset,
|
||||
uint32_t channel_index)
|
||||
{
|
||||
umc_v6_1_query_correctable_error_count(adev, umc_reg_offset,
|
||||
&(err_data->ce_count));
|
||||
umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset,
|
||||
&(err_data->ue_count));
|
||||
}
|
||||
|
||||
static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
{
|
||||
amdgpu_umc_for_each_channel(umc_v6_1_query_error_count);
|
||||
}
|
||||
|
||||
static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data,
|
||||
uint32_t umc_reg_offset, uint32_t channel_index)
|
||||
{
|
||||
uint32_t lsb, mc_umc_status_addr;
|
||||
uint64_t mc_umc_status, err_addr;
|
||||
|
||||
mc_umc_status_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
|
||||
|
||||
/* skip error address process if -ENOMEM */
|
||||
if (!err_data->err_addr) {
|
||||
/* clear umc status */
|
||||
WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
|
||||
return;
|
||||
}
|
||||
|
||||
mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
|
||||
|
||||
/* calculate error address if ue/ce error is detected */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
|
||||
err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4);
|
||||
|
||||
/* the lowest lsb bits should be ignored */
|
||||
lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
|
||||
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
||||
err_addr &= ~((0x1ULL << lsb) - 1);
|
||||
|
||||
/* translate umc channel address to soc pa, 3 parts are included */
|
||||
err_data->err_addr[err_data->err_addr_cnt] =
|
||||
ADDR_OF_8KB_BLOCK(err_addr) |
|
||||
ADDR_OF_256B_BLOCK(channel_index) |
|
||||
OFFSET_IN_256B_BLOCK(err_addr);
|
||||
|
||||
err_data->err_addr_cnt++;
|
||||
}
|
||||
|
||||
/* clear umc status */
|
||||
WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
|
||||
}
|
||||
|
||||
static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
{
|
||||
amdgpu_umc_for_each_channel(umc_v6_1_query_error_address);
|
||||
}
|
||||
|
||||
static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data,
|
||||
uint32_t umc_reg_offset, uint32_t channel_index)
|
||||
{
|
||||
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
|
||||
uint32_t ecc_err_cnt_addr;
|
||||
|
||||
ecc_err_cnt_sel_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
|
||||
ecc_err_cnt_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
|
||||
|
||||
/* select the lower chip and check the error count */
|
||||
ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
|
||||
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
|
||||
EccErrCntCsSel, 0);
|
||||
/* set ce error interrupt type to APIC based interrupt */
|
||||
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
|
||||
EccErrInt, 0x1);
|
||||
WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
|
||||
/* set error count to initial value */
|
||||
WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
|
||||
|
||||
/* select the higher chip and check the err counter */
|
||||
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
|
||||
EccErrCntCsSel, 1);
|
||||
WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
|
||||
WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
|
||||
}
|
||||
|
||||
static void umc_v6_1_ras_init(struct amdgpu_device *adev)
|
||||
{
|
||||
void *ras_error_status = NULL;
|
||||
|
||||
amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel);
|
||||
}
|
||||
|
||||
const struct amdgpu_umc_funcs umc_v6_1_funcs = {
|
||||
.ras_init = umc_v6_1_ras_init,
|
||||
.query_ras_error_count = umc_v6_1_query_ras_error_count,
|
||||
.query_ras_error_address = umc_v6_1_query_ras_error_address,
|
||||
.enable_umc_index_mode = umc_v6_1_enable_umc_index_mode,
|
||||
.disable_umc_index_mode = umc_v6_1_disable_umc_index_mode,
|
||||
};
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef __UMC_V6_1_H__
|
||||
#define __UMC_V6_1_H__
|
||||
|
||||
#include "soc15_common.h"
|
||||
#include "amdgpu.h"
|
||||
|
||||
/* HBM Memory Channel Width */
|
||||
#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128
|
||||
/* number of umc channel instance with memory map register access */
|
||||
#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4
|
||||
/* number of umc instance with memory map register access */
|
||||
#define UMC_V6_1_UMC_INSTANCE_NUM 8
|
||||
/* total channel instances in one umc block */
|
||||
#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM)
|
||||
/* UMC regiser per channel offset */
|
||||
#define UMC_V6_1_PER_CHANNEL_OFFSET 0x800
|
||||
|
||||
/* EccErrCnt max value */
|
||||
#define UMC_V6_1_CE_CNT_MAX 0xffff
|
||||
/* umc ce interrupt threshold */
|
||||
#define UMC_V6_1_CE_INT_THRESHOLD 0xffff
|
||||
/* umc ce count initial value */
|
||||
#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
|
||||
|
||||
extern const struct amdgpu_umc_funcs umc_v6_1_funcs;
|
||||
extern const uint32_t
|
||||
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
|
||||
|
||||
#endif
|
|
@ -1763,7 +1763,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = uvd_v7_0_ring_get_rptr,
|
||||
.get_wptr = uvd_v7_0_ring_get_wptr,
|
||||
.set_wptr = uvd_v7_0_ring_set_wptr,
|
||||
|
@ -1796,7 +1796,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
|
|||
.nop = HEVC_ENC_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
|
||||
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
|
||||
.set_wptr = uvd_v7_0_enc_ring_set_wptr,
|
||||
|
|
|
@ -1070,7 +1070,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
|
|||
.nop = VCE_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = vce_v4_0_ring_get_rptr,
|
||||
.get_wptr = vce_v4_0_ring_get_wptr,
|
||||
.set_wptr = vce_v4_0_ring_set_wptr,
|
||||
|
|
|
@ -63,6 +63,7 @@ static int vcn_v1_0_early_init(void *handle)
|
|||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
adev->vcn.num_vcn_inst = 1;
|
||||
adev->vcn.num_enc_rings = 2;
|
||||
|
||||
vcn_v1_0_set_dec_ring_funcs(adev);
|
||||
|
@ -87,20 +88,21 @@ static int vcn_v1_0_sw_init(void *handle)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
/* VCN DEC TRAP */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq);
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
|
||||
VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* VCN ENC TRAP */
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
|
||||
&adev->vcn.irq);
|
||||
&adev->vcn.inst->irq);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* VCN JPEG TRAP */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq);
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -122,39 +124,39 @@ static int vcn_v1_0_sw_init(void *handle)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
ring = &adev->vcn.ring_dec;
|
||||
ring = &adev->vcn.inst->ring_dec;
|
||||
sprintf(ring->name, "vcn_dec");
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 =
|
||||
adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 =
|
||||
SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
|
||||
adev->vcn.internal.data0 = adev->vcn.external.data0 =
|
||||
adev->vcn.internal.data0 = adev->vcn.inst->external.data0 =
|
||||
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
|
||||
adev->vcn.internal.data1 = adev->vcn.external.data1 =
|
||||
adev->vcn.internal.data1 = adev->vcn.inst->external.data1 =
|
||||
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
|
||||
adev->vcn.internal.cmd = adev->vcn.external.cmd =
|
||||
adev->vcn.internal.cmd = adev->vcn.inst->external.cmd =
|
||||
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
|
||||
adev->vcn.internal.nop = adev->vcn.external.nop =
|
||||
adev->vcn.internal.nop = adev->vcn.inst->external.nop =
|
||||
SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
ring = &adev->vcn.ring_enc[i];
|
||||
ring = &adev->vcn.inst->ring_enc[i];
|
||||
sprintf(ring->name, "vcn_enc%d", i);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
ring = &adev->vcn.inst->ring_jpeg;
|
||||
sprintf(ring->name, "vcn_jpeg");
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
|
||||
adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch =
|
||||
adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch =
|
||||
SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
|
||||
|
||||
return 0;
|
||||
|
@ -191,7 +193,7 @@ static int vcn_v1_0_sw_fini(void *handle)
|
|||
static int vcn_v1_0_hw_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
|
||||
int i, r;
|
||||
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
|
@ -199,14 +201,14 @@ static int vcn_v1_0_hw_init(void *handle)
|
|||
goto done;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
ring = &adev->vcn.ring_enc[i];
|
||||
ring = &adev->vcn.inst->ring_enc[i];
|
||||
ring->sched.ready = true;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
}
|
||||
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
ring = &adev->vcn.inst->ring_jpeg;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
goto done;
|
||||
|
@ -229,7 +231,7 @@ done:
|
|||
static int vcn_v1_0_hw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
|
||||
|
||||
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
|
||||
RREG32_SOC15(VCN, 0, mmUVD_STATUS))
|
||||
|
@ -304,9 +306,9 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
|
|||
offset = 0;
|
||||
} else {
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr));
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr));
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr));
|
||||
offset = size;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
|
||||
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
|
||||
|
@ -316,17 +318,17 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
|
|||
|
||||
/* cache window 1: stack */
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr + offset));
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr + offset));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr + offset));
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr + offset));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
|
||||
|
||||
/* cache window 2: context */
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
|
||||
|
||||
|
@ -374,9 +376,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
|
|||
offset = 0;
|
||||
} else {
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0);
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0);
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
|
||||
offset = size;
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
|
||||
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0);
|
||||
|
@ -386,9 +388,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
|
|||
|
||||
/* cache window 1: stack */
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0);
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0);
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,
|
||||
0xFFFFFFFF, 0);
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE,
|
||||
|
@ -396,10 +398,10 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
|
|||
|
||||
/* cache window 2: context */
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
|
||||
0xFFFFFFFF, 0);
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
|
||||
0xFFFFFFFF, 0);
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);
|
||||
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE,
|
||||
|
@ -779,7 +781,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
|
|||
*/
|
||||
static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
|
||||
uint32_t rb_bufsz, tmp;
|
||||
uint32_t lmi_swap_cntl;
|
||||
int i, j, r;
|
||||
|
@ -932,21 +934,21 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
|
|||
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
|
||||
~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
|
||||
|
||||
ring = &adev->vcn.ring_enc[0];
|
||||
ring = &adev->vcn.inst->ring_enc[0];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
|
||||
|
||||
ring = &adev->vcn.ring_enc[1];
|
||||
ring = &adev->vcn.inst->ring_enc[1];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
|
||||
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
ring = &adev->vcn.inst->ring_jpeg;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
|
||||
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
||||
|
@ -968,7 +970,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
|
|||
|
||||
static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
|
||||
uint32_t rb_bufsz, tmp;
|
||||
uint32_t lmi_swap_cntl;
|
||||
|
||||
|
@ -1106,7 +1108,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
|
|||
~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
|
||||
|
||||
/* initialize JPEG wptr */
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
ring = &adev->vcn.inst->ring_jpeg;
|
||||
ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
|
||||
|
||||
/* copy patch commands to the jpeg ring */
|
||||
|
@ -1255,21 +1257,21 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
|||
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
|
||||
|
||||
/* Restore */
|
||||
ring = &adev->vcn.ring_enc[0];
|
||||
ring = &adev->vcn.inst->ring_enc[0];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||
|
||||
ring = &adev->vcn.ring_enc[1];
|
||||
ring = &adev->vcn.inst->ring_enc[1];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
|
||||
|
||||
ring = &adev->vcn.ring_dec;
|
||||
ring = &adev->vcn.inst->ring_dec;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
|
@ -1315,7 +1317,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
|||
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
|
||||
|
||||
/* Restore */
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
ring = &adev->vcn.inst->ring_jpeg;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
||||
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
|
||||
|
@ -1329,7 +1331,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
|||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
||||
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
||||
|
||||
ring = &adev->vcn.ring_dec;
|
||||
ring = &adev->vcn.inst->ring_dec;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
|
@ -1596,7 +1598,7 @@ static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.ring_enc[0])
|
||||
if (ring == &adev->vcn.inst->ring_enc[0])
|
||||
return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
|
||||
else
|
||||
return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
|
||||
|
@ -1613,7 +1615,7 @@ static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.ring_enc[0])
|
||||
if (ring == &adev->vcn.inst->ring_enc[0])
|
||||
return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
|
||||
else
|
||||
return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
|
||||
|
@ -1630,7 +1632,7 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.ring_enc[0])
|
||||
if (ring == &adev->vcn.inst->ring_enc[0])
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
|
||||
lower_32_bits(ring->wptr));
|
||||
else
|
||||
|
@ -2114,16 +2116,16 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
|
|||
|
||||
switch (entry->src_id) {
|
||||
case 124:
|
||||
amdgpu_fence_process(&adev->vcn.ring_dec);
|
||||
amdgpu_fence_process(&adev->vcn.inst->ring_dec);
|
||||
break;
|
||||
case 119:
|
||||
amdgpu_fence_process(&adev->vcn.ring_enc[0]);
|
||||
amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);
|
||||
break;
|
||||
case 120:
|
||||
amdgpu_fence_process(&adev->vcn.ring_enc[1]);
|
||||
amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
|
||||
break;
|
||||
case 126:
|
||||
amdgpu_fence_process(&adev->vcn.ring_jpeg);
|
||||
amdgpu_fence_process(&adev->vcn.inst->ring_jpeg);
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("Unhandled interrupt: %d %d\n",
|
||||
|
@ -2198,7 +2200,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
|
||||
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
|
||||
.set_wptr = vcn_v1_0_dec_ring_set_wptr,
|
||||
|
@ -2232,7 +2234,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
|
|||
.nop = VCN_ENC_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
|
||||
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
|
||||
.set_wptr = vcn_v1_0_enc_ring_set_wptr,
|
||||
|
@ -2264,7 +2266,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
|
|||
.nop = PACKET0(0x81ff, 0),
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.extra_dw = 64,
|
||||
.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
|
||||
.get_wptr = vcn_v1_0_jpeg_ring_get_wptr,
|
||||
|
@ -2295,7 +2297,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
|
|||
|
||||
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
|
||||
adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
|
||||
DRM_INFO("VCN decode is enabled in VM mode\n");
|
||||
}
|
||||
|
||||
|
@ -2304,14 +2306,14 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
|
||||
adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
|
||||
adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
|
||||
|
||||
DRM_INFO("VCN encode is enabled in VM mode\n");
|
||||
}
|
||||
|
||||
static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs;
|
||||
adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs;
|
||||
DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
|
||||
}
|
||||
|
||||
|
@ -2322,8 +2324,8 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
|
|||
|
||||
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2;
|
||||
adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs;
|
||||
adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
|
||||
adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version vcn_v1_0_ip_block =
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
*/
|
||||
|
||||
#include <linux/firmware.h>
|
||||
#include <drm/drmP.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_vcn.h"
|
||||
#include "soc15.h"
|
||||
|
@ -92,6 +92,7 @@ static int vcn_v2_0_early_init(void *handle)
|
|||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
adev->vcn.num_vcn_inst = 1;
|
||||
adev->vcn.num_enc_rings = 2;
|
||||
|
||||
vcn_v2_0_set_dec_ring_funcs(adev);
|
||||
|
@ -118,7 +119,7 @@ static int vcn_v2_0_sw_init(void *handle)
|
|||
/* VCN DEC TRAP */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
|
||||
VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT,
|
||||
&adev->vcn.irq);
|
||||
&adev->vcn.inst->irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -126,15 +127,14 @@ static int vcn_v2_0_sw_init(void *handle)
|
|||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
|
||||
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
|
||||
&adev->vcn.irq);
|
||||
&adev->vcn.inst->irq);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* VCN JPEG TRAP */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
|
||||
VCN_2_0__SRCID__JPEG_DECODE,
|
||||
&adev->vcn.irq);
|
||||
VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -156,49 +156,56 @@ static int vcn_v2_0_sw_init(void *handle)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
ring = &adev->vcn.ring_dec;
|
||||
ring = &adev->vcn.inst->ring_dec;
|
||||
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
|
||||
|
||||
sprintf(ring->name, "vcn_dec");
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
|
||||
adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
|
||||
adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
|
||||
adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
|
||||
adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
|
||||
adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
|
||||
|
||||
adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
|
||||
adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
|
||||
adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
|
||||
adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
|
||||
adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
|
||||
adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
|
||||
adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
|
||||
adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
|
||||
adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
|
||||
adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
|
||||
adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
|
||||
adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
|
||||
adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
|
||||
adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
|
||||
adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
ring = &adev->vcn.ring_enc[i];
|
||||
ring = &adev->vcn.inst->ring_enc[i];
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
|
||||
sprintf(ring->name, "vcn_enc%d", i);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
ring = &adev->vcn.inst->ring_jpeg;
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
|
||||
sprintf(ring->name, "vcn_jpeg");
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
|
||||
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
|
||||
|
||||
adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
|
||||
adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
|
||||
adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -234,11 +241,11 @@ static int vcn_v2_0_sw_fini(void *handle)
|
|||
static int vcn_v2_0_hw_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
|
||||
int i, r;
|
||||
|
||||
adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
|
||||
ring->doorbell_index);
|
||||
ring->doorbell_index, 0);
|
||||
|
||||
ring->sched.ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
|
@ -248,7 +255,7 @@ static int vcn_v2_0_hw_init(void *handle)
|
|||
}
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
ring = &adev->vcn.ring_enc[i];
|
||||
ring = &adev->vcn.inst->ring_enc[i];
|
||||
ring->sched.ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
|
@ -257,7 +264,7 @@ static int vcn_v2_0_hw_init(void *handle)
|
|||
}
|
||||
}
|
||||
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
ring = &adev->vcn.inst->ring_jpeg;
|
||||
ring->sched.ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
|
@ -283,7 +290,7 @@ done:
|
|||
static int vcn_v2_0_hw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
|
||||
int i;
|
||||
|
||||
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
|
||||
|
@ -294,11 +301,11 @@ static int vcn_v2_0_hw_fini(void *handle)
|
|||
ring->sched.ready = false;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
ring = &adev->vcn.ring_enc[i];
|
||||
ring = &adev->vcn.inst->ring_enc[i];
|
||||
ring->sched.ready = false;
|
||||
}
|
||||
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
ring = &adev->vcn.inst->ring_jpeg;
|
||||
ring->sched.ready = false;
|
||||
|
||||
return 0;
|
||||
|
@ -368,9 +375,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
|
|||
offset = 0;
|
||||
} else {
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr));
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr));
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr));
|
||||
offset = size;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
|
||||
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
|
||||
|
@ -380,17 +387,17 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
|
|||
|
||||
/* cache window 1: stack */
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr + offset));
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr + offset));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr + offset));
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr + offset));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
|
||||
|
||||
/* cache window 2: context */
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
|
||||
lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
|
||||
upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
|
||||
|
||||
|
@ -426,10 +433,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
|
|||
} else {
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
|
||||
lower_32_bits(adev->vcn.gpu_addr), 0, indirect);
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
|
||||
upper_32_bits(adev->vcn.gpu_addr), 0, indirect);
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
|
||||
offset = size;
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
|
||||
|
@ -447,10 +454,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
|
|||
if (!indirect) {
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
|
||||
lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect);
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
|
||||
upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect);
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
|
||||
} else {
|
||||
|
@ -467,10 +474,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
|
|||
/* cache window 2: context */
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
|
||||
lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
|
||||
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
|
||||
upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
|
||||
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
|
||||
WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
|
||||
|
@ -658,7 +665,7 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
|
|||
*/
|
||||
static int jpeg_v2_0_start(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_jpeg;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg;
|
||||
uint32_t tmp;
|
||||
int r = 0;
|
||||
|
||||
|
@ -920,7 +927,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
|
|||
|
||||
static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
|
||||
{
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
|
||||
uint32_t rb_bufsz, tmp;
|
||||
|
||||
vcn_v2_0_enable_static_power_gating(adev);
|
||||
|
@ -1046,7 +1053,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
|
|||
|
||||
static int vcn_v2_0_start(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
|
||||
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
|
||||
uint32_t rb_bufsz, tmp;
|
||||
uint32_t lmi_swap_cntl;
|
||||
int i, j, r;
|
||||
|
@ -1197,14 +1204,14 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
|
|||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||
lower_32_bits(ring->wptr));
|
||||
|
||||
ring = &adev->vcn.ring_enc[0];
|
||||
ring = &adev->vcn.inst->ring_enc[0];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
|
||||
|
||||
ring = &adev->vcn.ring_enc[1];
|
||||
ring = &adev->vcn.inst->ring_enc[1];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||
|
@ -1351,14 +1358,14 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
|
|||
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
|
||||
|
||||
/* Restore */
|
||||
ring = &adev->vcn.ring_enc[0];
|
||||
ring = &adev->vcn.inst->ring_enc[0];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||
|
||||
ring = &adev->vcn.ring_enc[1];
|
||||
ring = &adev->vcn.inst->ring_enc[1];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
|
||||
|
@ -1480,12 +1487,14 @@ static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
|
|||
*
|
||||
* Write a start command to the ring.
|
||||
*/
|
||||
static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
|
||||
void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1);
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
|
||||
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1495,10 +1504,12 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
|
|||
*
|
||||
* Write a end command to the ring.
|
||||
*/
|
||||
static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
|
||||
void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1);
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
|
||||
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1508,14 +1519,15 @@ static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
|
|||
*
|
||||
* Write a nop command to the ring.
|
||||
*/
|
||||
static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
int i;
|
||||
|
||||
WARN_ON(ring->wptr % 2 || count % 2);
|
||||
|
||||
for (i = 0; i < count / 2; i++) {
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
}
|
||||
|
@ -1528,32 +1540,33 @@ static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun
|
|||
*
|
||||
* Write a fence and a trap command to the ring.
|
||||
*/
|
||||
static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags)
|
||||
void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags)
|
||||
{
|
||||
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0));
|
||||
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));
|
||||
amdgpu_ring_write(ring, seq);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
|
||||
amdgpu_ring_write(ring, addr & 0xffffffff);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
|
||||
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1);
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
|
||||
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
|
||||
|
||||
amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1);
|
||||
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1564,44 +1577,46 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
|
|||
*
|
||||
* Write ring commands to execute the indirect buffer
|
||||
*/
|
||||
static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
uint32_t flags)
|
||||
void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
uint32_t flags)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));
|
||||
amdgpu_ring_write(ring, vmid);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0));
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0));
|
||||
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0));
|
||||
amdgpu_ring_write(ring, ib->length_dw);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
|
||||
uint32_t reg, uint32_t val,
|
||||
uint32_t mask)
|
||||
void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
|
||||
uint32_t val, uint32_t mask)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
|
||||
amdgpu_ring_write(ring, reg << 2);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
|
||||
amdgpu_ring_write(ring, val);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0));
|
||||
amdgpu_ring_write(ring, mask);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
|
||||
|
||||
amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1);
|
||||
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));
|
||||
}
|
||||
|
||||
static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vmid, uint64_t pd_addr)
|
||||
void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vmid, uint64_t pd_addr)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
|
||||
uint32_t data0, data1, mask;
|
||||
|
@ -1615,18 +1630,20 @@ static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
|||
vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
|
||||
uint32_t reg, uint32_t val)
|
||||
void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
|
||||
uint32_t reg, uint32_t val)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
|
||||
amdgpu_ring_write(ring, reg << 2);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
|
||||
amdgpu_ring_write(ring, val);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
|
||||
|
||||
amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
|
||||
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1640,7 +1657,7 @@ static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.ring_enc[0])
|
||||
if (ring == &adev->vcn.inst->ring_enc[0])
|
||||
return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
|
||||
else
|
||||
return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
|
||||
|
@ -1657,7 +1674,7 @@ static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.ring_enc[0]) {
|
||||
if (ring == &adev->vcn.inst->ring_enc[0]) {
|
||||
if (ring->use_doorbell)
|
||||
return adev->wb.wb[ring->wptr_offs];
|
||||
else
|
||||
|
@ -1681,7 +1698,7 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (ring == &adev->vcn.ring_enc[0]) {
|
||||
if (ring == &adev->vcn.inst->ring_enc[0]) {
|
||||
if (ring->use_doorbell) {
|
||||
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
|
||||
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
|
||||
|
@ -1706,8 +1723,8 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
|
|||
*
|
||||
* Write enc a fence and a trap command to the ring.
|
||||
*/
|
||||
static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
||||
u64 seq, unsigned flags)
|
||||
void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
||||
u64 seq, unsigned flags)
|
||||
{
|
||||
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
|
||||
|
||||
|
@ -1718,7 +1735,7 @@ static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
|||
amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
|
||||
void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
|
||||
{
|
||||
amdgpu_ring_write(ring, VCN_ENC_CMD_END);
|
||||
}
|
||||
|
@ -1731,10 +1748,10 @@ static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
|
|||
*
|
||||
* Write enc ring commands to execute the indirect buffer
|
||||
*/
|
||||
static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
uint32_t flags)
|
||||
void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
|
@ -1745,9 +1762,8 @@ static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
|
|||
amdgpu_ring_write(ring, ib->length_dw);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
|
||||
uint32_t reg, uint32_t val,
|
||||
uint32_t mask)
|
||||
void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
|
||||
uint32_t val, uint32_t mask)
|
||||
{
|
||||
amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
|
||||
amdgpu_ring_write(ring, reg << 2);
|
||||
|
@ -1755,8 +1771,8 @@ static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
|
|||
amdgpu_ring_write(ring, val);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned int vmid, uint64_t pd_addr)
|
||||
void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned int vmid, uint64_t pd_addr)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
|
||||
|
||||
|
@ -1767,8 +1783,7 @@ static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
|||
lower_32_bits(pd_addr), 0xffffffff);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
|
||||
uint32_t reg, uint32_t val)
|
||||
void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
|
||||
{
|
||||
amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
|
||||
amdgpu_ring_write(ring, reg << 2);
|
||||
|
@ -1832,7 +1847,7 @@ static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
|
|||
*
|
||||
* Write a start command to the ring.
|
||||
*/
|
||||
static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
|
||||
void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
|
||||
0, 0, PACKETJ_TYPE0));
|
||||
|
@ -1850,7 +1865,7 @@ static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
|
|||
*
|
||||
* Write a end command to the ring.
|
||||
*/
|
||||
static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
|
||||
void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
|
||||
{
|
||||
amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
|
||||
0, 0, PACKETJ_TYPE0));
|
||||
|
@ -1869,8 +1884,8 @@ static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
|
|||
*
|
||||
* Write a fence and a trap command to the ring.
|
||||
*/
|
||||
static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags)
|
||||
void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags)
|
||||
{
|
||||
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
|
||||
|
||||
|
@ -1918,10 +1933,10 @@ static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6
|
|||
*
|
||||
* Write ring commands to execute the indirect buffer.
|
||||
*/
|
||||
static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
uint32_t flags)
|
||||
void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
uint32_t flags)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
|
@ -1969,9 +1984,8 @@ static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
|
|||
amdgpu_ring_write(ring, 0x2);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
|
||||
uint32_t reg, uint32_t val,
|
||||
uint32_t mask)
|
||||
void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
|
||||
uint32_t val, uint32_t mask)
|
||||
{
|
||||
uint32_t reg_offset = (reg << 2);
|
||||
|
||||
|
@ -1997,8 +2011,8 @@ static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
|
|||
amdgpu_ring_write(ring, mask);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vmid, uint64_t pd_addr)
|
||||
void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vmid, uint64_t pd_addr)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
|
||||
uint32_t data0, data1, mask;
|
||||
|
@ -2012,8 +2026,7 @@ static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
|||
vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
|
||||
uint32_t reg, uint32_t val)
|
||||
void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
|
||||
{
|
||||
uint32_t reg_offset = (reg << 2);
|
||||
|
||||
|
@ -2031,7 +2044,7 @@ static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
|
|||
amdgpu_ring_write(ring, val);
|
||||
}
|
||||
|
||||
static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -2059,16 +2072,16 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
|
|||
|
||||
switch (entry->src_id) {
|
||||
case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
|
||||
amdgpu_fence_process(&adev->vcn.ring_dec);
|
||||
amdgpu_fence_process(&adev->vcn.inst->ring_dec);
|
||||
break;
|
||||
case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
|
||||
amdgpu_fence_process(&adev->vcn.ring_enc[0]);
|
||||
amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);
|
||||
break;
|
||||
case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
|
||||
amdgpu_fence_process(&adev->vcn.ring_enc[1]);
|
||||
amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
|
||||
break;
|
||||
case VCN_2_0__SRCID__JPEG_DECODE:
|
||||
amdgpu_fence_process(&adev->vcn.ring_jpeg);
|
||||
amdgpu_fence_process(&adev->vcn.inst->ring_jpeg);
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("Unhandled interrupt: %d %d\n",
|
||||
|
@ -2079,6 +2092,36 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t tmp = 0;
|
||||
unsigned i;
|
||||
int r;
|
||||
|
||||
WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 4);
|
||||
if (r)
|
||||
return r;
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
|
||||
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
|
||||
amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
amdgpu_ring_commit(ring);
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
static int vcn_v2_0_set_powergating_state(void *handle,
|
||||
enum amd_powergating_state state)
|
||||
{
|
||||
|
@ -2128,7 +2171,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
|
|||
static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
|
||||
.type = AMDGPU_RING_TYPE_VCN_DEC,
|
||||
.align_mask = 0xf,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = vcn_v2_0_dec_ring_get_rptr,
|
||||
.get_wptr = vcn_v2_0_dec_ring_get_wptr,
|
||||
.set_wptr = vcn_v2_0_dec_ring_set_wptr,
|
||||
|
@ -2142,7 +2185,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
|
|||
.emit_ib = vcn_v2_0_dec_ring_emit_ib,
|
||||
.emit_fence = vcn_v2_0_dec_ring_emit_fence,
|
||||
.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
|
||||
.test_ring = amdgpu_vcn_dec_ring_test_ring,
|
||||
.test_ring = vcn_v2_0_dec_ring_test_ring,
|
||||
.test_ib = amdgpu_vcn_dec_ring_test_ib,
|
||||
.insert_nop = vcn_v2_0_dec_ring_insert_nop,
|
||||
.insert_start = vcn_v2_0_dec_ring_insert_start,
|
||||
|
@ -2159,7 +2202,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
|
|||
.type = AMDGPU_RING_TYPE_VCN_ENC,
|
||||
.align_mask = 0x3f,
|
||||
.nop = VCN_ENC_CMD_NO_OP,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = vcn_v2_0_enc_ring_get_rptr,
|
||||
.get_wptr = vcn_v2_0_enc_ring_get_wptr,
|
||||
.set_wptr = vcn_v2_0_enc_ring_set_wptr,
|
||||
|
@ -2188,7 +2231,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
|
|||
static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {
|
||||
.type = AMDGPU_RING_TYPE_VCN_JPEG,
|
||||
.align_mask = 0xf,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.vmhub = AMDGPU_MMHUB_0,
|
||||
.get_rptr = vcn_v2_0_jpeg_ring_get_rptr,
|
||||
.get_wptr = vcn_v2_0_jpeg_ring_get_wptr,
|
||||
.set_wptr = vcn_v2_0_jpeg_ring_set_wptr,
|
||||
|
@ -2217,7 +2260,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {
|
|||
|
||||
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
|
||||
adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
|
||||
DRM_INFO("VCN decode is enabled in VM mode\n");
|
||||
}
|
||||
|
||||
|
@ -2226,14 +2269,14 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
|
||||
adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
|
||||
adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
|
||||
|
||||
DRM_INFO("VCN encode is enabled in VM mode\n");
|
||||
}
|
||||
|
||||
static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs;
|
||||
adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs;
|
||||
DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
|
||||
}
|
||||
|
||||
|
@ -2244,8 +2287,8 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
|
|||
|
||||
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2;
|
||||
adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs;
|
||||
adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
|
||||
adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version vcn_v2_0_ip_block =
|
||||
|
|
|
@ -24,6 +24,44 @@
|
|||
#ifndef __VCN_V2_0_H__
|
||||
#define __VCN_V2_0_H__
|
||||
|
||||
extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
|
||||
extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
|
||||
extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
|
||||
extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags);
|
||||
extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib, uint32_t flags);
|
||||
extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
|
||||
uint32_t val, uint32_t mask);
|
||||
extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vmid, uint64_t pd_addr);
|
||||
extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
|
||||
uint32_t reg, uint32_t val);
|
||||
|
||||
extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring);
|
||||
extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
||||
u64 seq, unsigned flags);
|
||||
extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib, uint32_t flags);
|
||||
extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
|
||||
uint32_t val, uint32_t mask);
|
||||
extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned int vmid, uint64_t pd_addr);
|
||||
extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
|
||||
|
||||
extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring);
|
||||
extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring);
|
||||
extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags);
|
||||
extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib, uint32_t flags);
|
||||
extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
|
||||
uint32_t val, uint32_t mask);
|
||||
extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vmid, uint64_t pd_addr);
|
||||
extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
|
||||
extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count);
|
||||
|
||||
extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block;
|
||||
|
||||
#endif /* __VCN_V2_0_H__ */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __VCN_V2_5_H__
|
||||
#define __VCN_V2_5_H__
|
||||
|
||||
extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block;
|
||||
|
||||
#endif /* __VCN_V2_5_H__ */
|
|
@ -50,7 +50,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
|
|||
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||
return;
|
||||
|
@ -64,7 +64,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||
RB_ENABLE, 1);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||
|
@ -80,7 +80,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
||||
RB_ENABLE, 1);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||
|
@ -106,7 +106,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
|||
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||
return;
|
||||
|
@ -125,7 +125,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||
RB_ENABLE, 0);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||
|
@ -145,7 +145,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
||||
RB_ENABLE, 0);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||
|
@ -219,7 +219,7 @@ static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih)
|
|||
static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ih_ring *ih;
|
||||
u32 ih_rb_cntl;
|
||||
u32 ih_rb_cntl, ih_chicken;
|
||||
int ret = 0;
|
||||
u32 tmp;
|
||||
|
||||
|
@ -238,7 +238,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
|
||||
!!adev->irq.msi_enabled);
|
||||
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||
return -ETIMEDOUT;
|
||||
|
@ -247,6 +247,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
|||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
}
|
||||
|
||||
if (adev->asic_type == CHIP_ARCTURUS &&
|
||||
adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
|
||||
if (adev->irq.ih.use_bus_addr) {
|
||||
ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
|
||||
ih_chicken |= 0x00000010;
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
|
||||
}
|
||||
}
|
||||
|
||||
/* set the writeback address whether it's enabled or not */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
|
||||
lower_32_bits(ih->wptr_addr));
|
||||
|
@ -272,7 +281,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
|||
WPTR_OVERFLOW_ENABLE, 0);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
RB_FULL_DRAIN_ENABLE, 1);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||
|
@ -299,7 +308,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
||||
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||
|
|
|
@ -50,6 +50,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
|
|||
adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
|
||||
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
|
||||
adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
|
||||
adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
|
||||
adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -85,6 +87,10 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
|
|||
adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3;
|
||||
adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;
|
||||
adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7;
|
||||
adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1;
|
||||
adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3;
|
||||
adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5;
|
||||
adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7;
|
||||
|
||||
adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP;
|
||||
adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP;
|
||||
|
|
|
@ -711,6 +711,12 @@ static int vi_asic_reset(struct amdgpu_device *adev)
|
|||
return r;
|
||||
}
|
||||
|
||||
static enum amd_reset_method
|
||||
vi_asic_reset_method(struct amdgpu_device *adev)
|
||||
{
|
||||
return AMD_RESET_METHOD_LEGACY;
|
||||
}
|
||||
|
||||
static u32 vi_get_config_memsize(struct amdgpu_device *adev)
|
||||
{
|
||||
return RREG32(mmCONFIG_MEMSIZE);
|
||||
|
@ -1023,6 +1029,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
|
|||
.read_bios_from_rom = &vi_read_bios_from_rom,
|
||||
.read_register = &vi_read_register,
|
||||
.reset = &vi_asic_reset,
|
||||
.reset_method = &vi_asic_reset_method,
|
||||
.set_vga_state = &vi_vga_set_state,
|
||||
.get_xclk = &vi_get_xclk,
|
||||
.set_uvd_clocks = &vi_set_uvd_clocks,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -24,78 +24,6 @@
|
|||
* PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex
|
||||
*/
|
||||
|
||||
/* HW (VI) source code for CWSR trap handler */
|
||||
/* Version 18 + multiple trap handler */
|
||||
|
||||
// this performance-optimal version was originally from Seven Xu at SRDC
|
||||
|
||||
// Revison #18 --...
|
||||
/* Rev History
|
||||
** #1. Branch from gc dv. //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV)
|
||||
** #4. SR Memory Layout:
|
||||
** 1. VGPR-SGPR-HWREG-{LDS}
|
||||
** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern..
|
||||
** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
|
||||
** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation)
|
||||
** #7. Update: 1. don't barrier if noLDS
|
||||
** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
|
||||
** 2. Fix SQ issue by s_sleep 2
|
||||
** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
|
||||
** 2. optimize s_buffer save by burst 16sgprs...
|
||||
** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
|
||||
** #11. Update 1. Add 2 more timestamp for debug version
|
||||
** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance
|
||||
** #13. Integ 1. Always use MUBUF for PV trap shader...
|
||||
** #14. Update 1. s_buffer_store soft clause...
|
||||
** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot.
|
||||
** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
|
||||
** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part]
|
||||
** 2. PERF - Save LDS before save VGPR to cover LDS save long latency...
|
||||
** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32
|
||||
** 2. FUNC - Handle non-CWSR traps
|
||||
*/
|
||||
|
||||
var G8SR_WDMEM_HWREG_OFFSET = 0
|
||||
var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes
|
||||
|
||||
// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore.
|
||||
|
||||
var G8SR_DEBUG_TIMESTAMP = 0
|
||||
var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset
|
||||
var s_g8sr_ts_save_s = s[34:35] // save start
|
||||
var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi
|
||||
var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ
|
||||
var s_g8sr_ts_save_d = s[40:41] // save end
|
||||
var s_g8sr_ts_restore_s = s[42:43] // restore start
|
||||
var s_g8sr_ts_restore_d = s[44:45] // restore end
|
||||
|
||||
var G8SR_VGPR_SR_IN_DWX4 = 0
|
||||
var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes
|
||||
var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4
|
||||
|
||||
|
||||
/*************************************************************************/
|
||||
/* control on how to run the shader */
|
||||
/*************************************************************************/
|
||||
//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run)
|
||||
var EMU_RUN_HACK = 0
|
||||
var EMU_RUN_HACK_RESTORE_NORMAL = 0
|
||||
var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
|
||||
var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
|
||||
var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
|
||||
var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
|
||||
var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
|
||||
var SAVE_LDS = 1
|
||||
var WG_BASE_ADDR_LO = 0x9000a000
|
||||
var WG_BASE_ADDR_HI = 0x0
|
||||
var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem
|
||||
var CTX_SAVE_CONTROL = 0x0
|
||||
var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL
|
||||
var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run)
|
||||
var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
|
||||
var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
|
||||
var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
|
||||
|
||||
/**************************************************************************/
|
||||
/* variables */
|
||||
/**************************************************************************/
|
||||
|
@ -226,16 +154,7 @@ shader main
|
|||
type(CS)
|
||||
|
||||
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore
|
||||
//FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
|
||||
s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC
|
||||
s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
|
||||
s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE
|
||||
//FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE
|
||||
s_branch L_SKIP_RESTORE //NOT restore, SAVE actually
|
||||
else
|
||||
s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
|
||||
end
|
||||
|
||||
L_JUMP_TO_RESTORE:
|
||||
s_branch L_RESTORE //restore
|
||||
|
@ -249,7 +168,7 @@ L_SKIP_RESTORE:
|
|||
s_cbranch_scc1 L_SAVE //this is the operation for save
|
||||
|
||||
// ********* Handle non-CWSR traps *******************
|
||||
if (!EMU_RUN_HACK)
|
||||
|
||||
/* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */
|
||||
s_load_dwordx4 [ttmp8,ttmp9,ttmp10, ttmp11], [tma_lo,tma_hi], 0
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
@ -268,7 +187,7 @@ L_EXCP_CASE:
|
|||
s_and_b32 ttmp1, ttmp1, 0xFFFF
|
||||
set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
end
|
||||
|
||||
// ********* End handling of non-CWSR traps *******************
|
||||
|
||||
/**************************************************************************/
|
||||
|
@ -276,12 +195,6 @@ end
|
|||
/**************************************************************************/
|
||||
|
||||
L_SAVE:
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_save_s
|
||||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||
end
|
||||
|
||||
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
|
||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
|
||||
|
||||
|
@ -303,16 +216,7 @@ end
|
|||
s_mov_b32 s_save_exec_hi, exec_hi
|
||||
s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_sq_save_msg
|
||||
s_waitcnt lgkmcnt(0)
|
||||
end
|
||||
|
||||
if (EMU_RUN_HACK)
|
||||
|
||||
else
|
||||
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
|
||||
end
|
||||
|
||||
// Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
|
||||
s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
|
||||
|
@ -321,36 +225,9 @@ end
|
|||
L_SLEEP:
|
||||
s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
|
||||
|
||||
if (EMU_RUN_HACK)
|
||||
|
||||
else
|
||||
s_cbranch_execz L_SLEEP
|
||||
end
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_spi_wrexec
|
||||
s_waitcnt lgkmcnt(0)
|
||||
end
|
||||
|
||||
/* setup Resource Contants */
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
|
||||
//calculate wd_addr using absolute thread id
|
||||
v_readlane_b32 s_save_tmp, v9, 0
|
||||
s_lshr_b32 s_save_tmp, s_save_tmp, 6
|
||||
s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
|
||||
s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
|
||||
s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
|
||||
s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
|
||||
else
|
||||
end
|
||||
if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
|
||||
s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
|
||||
s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
|
||||
s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
|
||||
else
|
||||
end
|
||||
|
||||
|
||||
s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
|
||||
|
@ -383,22 +260,10 @@ end
|
|||
|
||||
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
|
||||
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0
|
||||
|
||||
if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
|
||||
s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
|
||||
s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
|
||||
s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO
|
||||
s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI
|
||||
end
|
||||
|
||||
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC
|
||||
write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC
|
||||
|
@ -440,18 +305,8 @@ end
|
|||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
|
||||
s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
|
||||
|
||||
if (SGPR_SAVE_USE_SQC)
|
||||
s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes
|
||||
else
|
||||
s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
|
||||
end
|
||||
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
|
||||
// backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
|
||||
//s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
|
||||
|
@ -490,30 +345,14 @@ end
|
|||
s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
|
||||
s_mov_b32 exec_hi, 0xFFFFFFFF
|
||||
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
|
||||
// VGPR Allocated in 4-GPR granularity
|
||||
|
||||
if G8SR_VGPR_SR_IN_DWX4
|
||||
// the const stride for DWx4 is 4*4 bytes
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
|
||||
|
||||
buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
|
||||
else
|
||||
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
|
||||
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
|
||||
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
@ -549,64 +388,10 @@ end
|
|||
s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
|
||||
|
||||
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
s_mov_b32 m0, 0x0 //lds_offset initial value = 0
|
||||
|
||||
|
||||
var LDS_DMA_ENABLE = 0
|
||||
var UNROLL = 0
|
||||
if UNROLL==0 && LDS_DMA_ENABLE==1
|
||||
s_mov_b32 s3, 256*2
|
||||
s_nop 0
|
||||
s_nop 0
|
||||
s_nop 0
|
||||
L_SAVE_LDS_LOOP:
|
||||
//TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.???
|
||||
if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity
|
||||
buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW
|
||||
buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
|
||||
end
|
||||
|
||||
s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
|
||||
s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete?
|
||||
|
||||
elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss
|
||||
// store from higest LDS address to lowest
|
||||
s_mov_b32 s3, 256*2
|
||||
s_sub_u32 m0, s_save_alloc_size, s3
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, m0
|
||||
s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks...
|
||||
s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest
|
||||
s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction
|
||||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc
|
||||
s_nop 0
|
||||
s_nop 0
|
||||
s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes
|
||||
s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved
|
||||
s_add_u32 s0, s0,s_save_alloc_size
|
||||
s_addc_u32 s1, s1, 0
|
||||
s_setpc_b64 s[0:1]
|
||||
|
||||
|
||||
for var i =0; i< 128; i++
|
||||
// be careful to make here a 64Byte aligned address, which could improve performance...
|
||||
buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW
|
||||
buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
|
||||
|
||||
if i!=127
|
||||
s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline
|
||||
s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3
|
||||
end
|
||||
end
|
||||
|
||||
else // BUFFER_STORE
|
||||
v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
|
||||
v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid
|
||||
v_mul_i32_i24 v2, v3, 8 // tid*8
|
||||
|
@ -628,8 +413,6 @@ L_SAVE_LDS_LOOP_VECTOR:
|
|||
// restore rsrc3
|
||||
s_mov_b32 s_save_buf_rsrc3, s0
|
||||
|
||||
end
|
||||
|
||||
L_SAVE_LDS_DONE:
|
||||
|
||||
|
||||
|
@ -647,44 +430,8 @@ L_SAVE_LDS_DONE:
|
|||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
|
||||
s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible
|
||||
s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
|
||||
// VGPR Allocated in 4-GPR granularity
|
||||
|
||||
if G8SR_VGPR_SR_IN_DWX4
|
||||
// the const stride for DWx4 is 4*4 bytes
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
|
||||
|
||||
s_mov_b32 m0, 4 // skip first 4 VGPRs
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs
|
||||
|
||||
s_set_gpr_idx_on m0, 0x1 // This will change M0
|
||||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0
|
||||
L_SAVE_VGPR_LOOP:
|
||||
v_mov_b32 v0, v0 // v0 = v[0+m0]
|
||||
v_mov_b32 v1, v1
|
||||
v_mov_b32 v2, v2
|
||||
v_mov_b32 v3, v3
|
||||
|
||||
|
||||
buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
s_add_u32 m0, m0, 4
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
|
||||
s_set_gpr_idx_off
|
||||
L_SAVE_VGPR_LOOP_END:
|
||||
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
|
||||
else
|
||||
// VGPR store using dw burst
|
||||
s_mov_b32 m0, 0x4 //VGPR initial index value =0
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
|
@ -700,52 +447,18 @@ else
|
|||
v_mov_b32 v2, v2 //v0 = v[0+m0]
|
||||
v_mov_b32 v3, v3 //v0 = v[0+m0]
|
||||
|
||||
if(USE_MTBUF_INSTEAD_OF_MUBUF)
|
||||
tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
|
||||
else
|
||||
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
|
||||
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
|
||||
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
|
||||
end
|
||||
|
||||
s_add_u32 m0, m0, 4 //next vgpr index
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
|
||||
s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
|
||||
s_set_gpr_idx_off
|
||||
end
|
||||
|
||||
L_SAVE_VGPR_END:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* S_PGM_END_SAVED */ //FIXME graphics ONLY
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
|
||||
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
|
||||
s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
|
||||
s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
|
||||
s_rfe_b64 s_save_pc_lo //Return to the main shader program
|
||||
else
|
||||
end
|
||||
|
||||
// Save Done timestamp
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_save_d
|
||||
// SGPR SR memory offset : size(VGPR)
|
||||
get_vgpr_size_bytes(s_save_mem_offset)
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET
|
||||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||
// Need reset rsrc2??
|
||||
s_mov_b32 m0, s_save_mem_offset
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1
|
||||
end
|
||||
|
||||
|
||||
s_branch L_END_PGM
|
||||
|
||||
|
||||
|
@ -756,27 +469,6 @@ end
|
|||
|
||||
L_RESTORE:
|
||||
/* Setup Resource Contants */
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
|
||||
//calculate wd_addr using absolute thread id
|
||||
v_readlane_b32 s_restore_tmp, v9, 0
|
||||
s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
|
||||
s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
|
||||
s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
|
||||
s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
|
||||
s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
|
||||
else
|
||||
end
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_restore_s
|
||||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||
// tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case...
|
||||
s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0]
|
||||
s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored..
|
||||
end
|
||||
|
||||
|
||||
|
||||
s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
|
||||
s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
|
||||
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
|
||||
|
@ -818,18 +510,12 @@ end
|
|||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow???
|
||||
|
||||
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
s_mov_b32 m0, 0x0 //lds_offset initial value = 0
|
||||
|
||||
L_RESTORE_LDS_LOOP:
|
||||
if (SAVE_LDS)
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
|
||||
end
|
||||
s_add_u32 m0, m0, 256*2 // 128 DW
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
|
||||
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
|
||||
|
@ -848,40 +534,8 @@ end
|
|||
s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
|
||||
s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
|
||||
s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
if G8SR_VGPR_SR_IN_DWX4
|
||||
get_vgpr_size_bytes(s_restore_mem_offset)
|
||||
s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
|
||||
|
||||
// the const stride for DWx4 is 4*4 bytes
|
||||
s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
|
||||
|
||||
s_mov_b32 m0, s_restore_alloc_size
|
||||
s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0
|
||||
|
||||
L_RESTORE_VGPR_LOOP:
|
||||
buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
|
||||
s_waitcnt vmcnt(0)
|
||||
s_sub_u32 m0, m0, 4
|
||||
v_mov_b32 v0, v0 // v[0+m0] = v0
|
||||
v_mov_b32 v1, v1
|
||||
v_mov_b32 v2, v2
|
||||
v_mov_b32 v3, v3
|
||||
s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
|
||||
s_cmp_eq_u32 m0, 0x8000
|
||||
s_cbranch_scc0 L_RESTORE_VGPR_LOOP
|
||||
s_set_gpr_idx_off
|
||||
|
||||
s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes
|
||||
|
||||
else
|
||||
// VGPR load using dw burst
|
||||
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
|
||||
|
@ -890,14 +544,10 @@ else
|
|||
s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
|
||||
|
||||
L_RESTORE_VGPR_LOOP:
|
||||
if(USE_MTBUF_INSTEAD_OF_MUBUF)
|
||||
tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
|
||||
else
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
|
||||
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
|
||||
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
|
||||
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
|
||||
end
|
||||
s_waitcnt vmcnt(0) //ensure data ready
|
||||
v_mov_b32 v0, v0 //v[0+m0] = v0
|
||||
v_mov_b32 v1, v1
|
||||
|
@ -909,16 +559,10 @@ else
|
|||
s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete?
|
||||
s_set_gpr_idx_off
|
||||
/* VGPR restore on v0 */
|
||||
if(USE_MTBUF_INSTEAD_OF_MUBUF)
|
||||
tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
|
||||
else
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
|
||||
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
|
||||
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
|
||||
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
/* restore SGPRs */
|
||||
//////////////////////////////
|
||||
|
@ -934,16 +578,8 @@ end
|
|||
s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
|
||||
s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
|
||||
|
||||
if (SGPR_SAVE_USE_SQC)
|
||||
s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes
|
||||
else
|
||||
s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
|
||||
end
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
/* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111),
|
||||
However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG
|
||||
|
@ -972,12 +608,6 @@ end
|
|||
//////////////////////////////
|
||||
L_RESTORE_HWREG:
|
||||
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo
|
||||
s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi
|
||||
end
|
||||
|
||||
// HWREG SR memory offset : size(VGPR)+size(SGPR)
|
||||
get_vgpr_size_bytes(s_restore_mem_offset)
|
||||
get_sgpr_size_bytes(s_restore_tmp)
|
||||
|
@ -985,11 +615,7 @@ end
|
|||
|
||||
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0
|
||||
read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC
|
||||
|
@ -1006,16 +632,6 @@ end
|
|||
|
||||
s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
|
||||
|
||||
//for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
|
||||
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
|
||||
s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
|
||||
end
|
||||
if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
|
||||
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal
|
||||
s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
|
||||
end
|
||||
|
||||
s_mov_b32 m0, s_restore_m0
|
||||
s_mov_b32 exec_lo, s_restore_exec_lo
|
||||
s_mov_b32 exec_hi, s_restore_exec_hi
|
||||
|
@ -1048,11 +664,6 @@ end
|
|||
|
||||
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_restore_d
|
||||
s_waitcnt lgkmcnt(0)
|
||||
end
|
||||
|
||||
// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
|
||||
s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
|
||||
|
||||
|
|
|
@ -24,76 +24,9 @@
|
|||
* PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex
|
||||
*/
|
||||
|
||||
/* HW (GFX9) source code for CWSR trap handler */
|
||||
/* Version 18 + multiple trap handler */
|
||||
|
||||
// this performance-optimal version was originally from Seven Xu at SRDC
|
||||
|
||||
// Revison #18 --...
|
||||
/* Rev History
|
||||
** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV)
|
||||
** #4. SR Memory Layout:
|
||||
** 1. VGPR-SGPR-HWREG-{LDS}
|
||||
** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern..
|
||||
** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
|
||||
** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation)
|
||||
** #7. Update: 1. don't barrier if noLDS
|
||||
** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
|
||||
** 2. Fix SQ issue by s_sleep 2
|
||||
** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
|
||||
** 2. optimize s_buffer save by burst 16sgprs...
|
||||
** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
|
||||
** #11. Update 1. Add 2 more timestamp for debug version
|
||||
** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance
|
||||
** #13. Integ 1. Always use MUBUF for PV trap shader...
|
||||
** #14. Update 1. s_buffer_store soft clause...
|
||||
** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot.
|
||||
** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
|
||||
** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part]
|
||||
** 2. PERF - Save LDS before save VGPR to cover LDS save long latency...
|
||||
** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32
|
||||
** 2. FUNC - Handle non-CWSR traps
|
||||
*/
|
||||
|
||||
var G8SR_WDMEM_HWREG_OFFSET = 0
|
||||
var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes
|
||||
|
||||
// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore.
|
||||
|
||||
var G8SR_DEBUG_TIMESTAMP = 0
|
||||
var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset
|
||||
var s_g8sr_ts_save_s = s[34:35] // save start
|
||||
var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi
|
||||
var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ
|
||||
var s_g8sr_ts_save_d = s[40:41] // save end
|
||||
var s_g8sr_ts_restore_s = s[42:43] // restore start
|
||||
var s_g8sr_ts_restore_d = s[44:45] // restore end
|
||||
|
||||
var G8SR_VGPR_SR_IN_DWX4 = 0
|
||||
var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes
|
||||
var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4
|
||||
|
||||
|
||||
/*************************************************************************/
|
||||
/* control on how to run the shader */
|
||||
/*************************************************************************/
|
||||
//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run)
|
||||
var EMU_RUN_HACK = 0
|
||||
var EMU_RUN_HACK_RESTORE_NORMAL = 0
|
||||
var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
|
||||
var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
|
||||
var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
|
||||
var SAVE_LDS = 1
|
||||
var WG_BASE_ADDR_LO = 0x9000a000
|
||||
var WG_BASE_ADDR_HI = 0x0
|
||||
var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem
|
||||
var CTX_SAVE_CONTROL = 0x0
|
||||
var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL
|
||||
var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run)
|
||||
var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
|
||||
var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
|
||||
var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
|
||||
var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
|
||||
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
|
||||
var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
|
||||
|
||||
/**************************************************************************/
|
||||
/* variables */
|
||||
|
@ -107,6 +40,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0
|
|||
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
|
||||
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
|
||||
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
|
||||
var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
|
||||
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
|
||||
|
@ -127,12 +61,15 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
|
|||
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
|
||||
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
|
||||
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
|
||||
var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
|
||||
|
||||
var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
|
||||
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
|
||||
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
|
||||
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
|
||||
|
||||
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
|
||||
|
||||
var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
|
||||
var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
|
||||
|
||||
|
@ -197,13 +134,15 @@ var s_restore_spi_init_lo = exec_lo
|
|||
var s_restore_spi_init_hi = exec_hi
|
||||
|
||||
var s_restore_mem_offset = ttmp12
|
||||
var s_restore_accvgpr_offset = ttmp13
|
||||
var s_restore_alloc_size = ttmp3
|
||||
var s_restore_tmp = ttmp2
|
||||
var s_restore_mem_offset_save = s_restore_tmp //no conflict
|
||||
var s_restore_accvgpr_offset_save = ttmp7
|
||||
|
||||
var s_restore_m0 = s_restore_alloc_size //no conflict
|
||||
|
||||
var s_restore_mode = ttmp7
|
||||
var s_restore_mode = s_restore_accvgpr_offset_save
|
||||
|
||||
var s_restore_pc_lo = ttmp0
|
||||
var s_restore_pc_hi = ttmp1
|
||||
|
@ -226,20 +165,11 @@ var s_restore_ttmps_hi = s_restore_alloc_size //no conflict
|
|||
/* Shader Main*/
|
||||
|
||||
shader main
|
||||
asic(GFX9)
|
||||
asic(DEFAULT)
|
||||
type(CS)
|
||||
|
||||
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore
|
||||
//FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
|
||||
s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC
|
||||
s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
|
||||
s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE
|
||||
//FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE
|
||||
s_branch L_SKIP_RESTORE //NOT restore, SAVE actually
|
||||
else
|
||||
s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
|
||||
end
|
||||
|
||||
L_JUMP_TO_RESTORE:
|
||||
s_branch L_RESTORE //restore
|
||||
|
@ -248,12 +178,29 @@ L_SKIP_RESTORE:
|
|||
|
||||
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
|
||||
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
|
||||
|
||||
if SINGLE_STEP_MISSED_WORKAROUND
|
||||
// No single step exceptions if MODE.DEBUG_EN=0.
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
|
||||
s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
|
||||
|
||||
// Second-level trap already handled exception if STATUS.HALT=1.
|
||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
|
||||
// Prioritize single step exception over context save.
|
||||
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
|
||||
s_cbranch_scc0 L_FETCH_2ND_TRAP
|
||||
|
||||
L_NO_SINGLE_STEP_WORKAROUND:
|
||||
end
|
||||
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
|
||||
s_cbranch_scc1 L_SAVE //this is the operation for save
|
||||
|
||||
// ********* Handle non-CWSR traps *******************
|
||||
if (!EMU_RUN_HACK)
|
||||
|
||||
// Illegal instruction is a non-maskable exception which blocks context save.
|
||||
// Halt the wavefront and return from the trap.
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
|
||||
|
@ -330,7 +277,7 @@ L_EXCP_CASE:
|
|||
set_status_without_spi_prio(s_save_status, ttmp2)
|
||||
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
end
|
||||
|
||||
// ********* End handling of non-CWSR traps *******************
|
||||
|
||||
/**************************************************************************/
|
||||
|
@ -338,12 +285,6 @@ end
|
|||
/**************************************************************************/
|
||||
|
||||
L_SAVE:
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_save_s
|
||||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||
end
|
||||
|
||||
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
|
||||
|
||||
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
|
||||
|
@ -365,16 +306,7 @@ end
|
|||
s_mov_b32 s_save_exec_hi, exec_hi
|
||||
s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_sq_save_msg
|
||||
s_waitcnt lgkmcnt(0)
|
||||
end
|
||||
|
||||
if (EMU_RUN_HACK)
|
||||
|
||||
else
|
||||
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
|
||||
end
|
||||
|
||||
// Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
|
||||
s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
|
||||
|
@ -383,33 +315,7 @@ end
|
|||
L_SLEEP:
|
||||
s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
|
||||
|
||||
if (EMU_RUN_HACK)
|
||||
|
||||
else
|
||||
s_cbranch_execz L_SLEEP
|
||||
end
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_spi_wrexec
|
||||
s_waitcnt lgkmcnt(0)
|
||||
end
|
||||
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
|
||||
//calculate wd_addr using absolute thread id
|
||||
v_readlane_b32 s_save_tmp, v9, 0
|
||||
s_lshr_b32 s_save_tmp, s_save_tmp, 6
|
||||
s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
|
||||
s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
|
||||
s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
|
||||
s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
|
||||
else
|
||||
end
|
||||
if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
|
||||
s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
|
||||
s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
|
||||
s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
|
||||
else
|
||||
end
|
||||
|
||||
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||
|
@ -459,20 +365,10 @@ end
|
|||
|
||||
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
|
||||
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0
|
||||
|
||||
if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
|
||||
s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
|
||||
s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
|
||||
end
|
||||
|
||||
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC
|
||||
write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC
|
||||
|
@ -510,17 +406,9 @@ end
|
|||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
|
||||
s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
|
||||
|
||||
if (SGPR_SAVE_USE_SQC)
|
||||
s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes
|
||||
else
|
||||
s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
|
||||
end
|
||||
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
|
||||
// backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
|
||||
|
@ -563,30 +451,25 @@ end
|
|||
s_mov_b32 xnack_mask_lo, 0x0
|
||||
s_mov_b32 xnack_mask_hi, 0x0
|
||||
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
|
||||
// VGPR Allocated in 4-GPR granularity
|
||||
|
||||
if G8SR_VGPR_SR_IN_DWX4
|
||||
// the const stride for DWx4 is 4*4 bytes
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
|
||||
if SAVE_AFTER_XNACK_ERROR
|
||||
check_if_tcp_store_ok()
|
||||
s_cbranch_scc1 L_SAVE_FIRST_VGPRS_WITH_TCP
|
||||
|
||||
buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
s_branch L_SAVE_LDS
|
||||
|
||||
L_SAVE_FIRST_VGPRS_WITH_TCP:
|
||||
end
|
||||
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
|
||||
else
|
||||
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
|
||||
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
|
||||
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
@ -621,66 +504,34 @@ end
|
|||
s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
|
||||
|
||||
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
s_mov_b32 m0, 0x0 //lds_offset initial value = 0
|
||||
|
||||
|
||||
var LDS_DMA_ENABLE = 0
|
||||
var UNROLL = 0
|
||||
if UNROLL==0 && LDS_DMA_ENABLE==1
|
||||
s_mov_b32 s3, 256*2
|
||||
s_nop 0
|
||||
s_nop 0
|
||||
s_nop 0
|
||||
L_SAVE_LDS_LOOP:
|
||||
//TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.???
|
||||
if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity
|
||||
buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW
|
||||
buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
|
||||
end
|
||||
|
||||
s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
|
||||
s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete?
|
||||
|
||||
elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss
|
||||
// store from higest LDS address to lowest
|
||||
s_mov_b32 s3, 256*2
|
||||
s_sub_u32 m0, s_save_alloc_size, s3
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, m0
|
||||
s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks...
|
||||
s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest
|
||||
s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction
|
||||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc
|
||||
s_nop 0
|
||||
s_nop 0
|
||||
s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes
|
||||
s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved
|
||||
s_add_u32 s0, s0,s_save_alloc_size
|
||||
s_addc_u32 s1, s1, 0
|
||||
s_setpc_b64 s[0:1]
|
||||
|
||||
|
||||
for var i =0; i< 128; i++
|
||||
// be careful to make here a 64Byte aligned address, which could improve performance...
|
||||
buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW
|
||||
buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
|
||||
|
||||
if i!=127
|
||||
s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline
|
||||
s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3
|
||||
end
|
||||
end
|
||||
|
||||
else // BUFFER_STORE
|
||||
v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
|
||||
v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid
|
||||
|
||||
if SAVE_AFTER_XNACK_ERROR
|
||||
check_if_tcp_store_ok()
|
||||
s_cbranch_scc1 L_SAVE_LDS_WITH_TCP
|
||||
|
||||
v_lshlrev_b32 v2, 2, v3
|
||||
L_SAVE_LDS_LOOP_SQC:
|
||||
ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
v_add_u32 v2, 0x200, v2
|
||||
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
|
||||
s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC
|
||||
|
||||
s_branch L_SAVE_LDS_DONE
|
||||
|
||||
L_SAVE_LDS_WITH_TCP:
|
||||
end
|
||||
|
||||
v_mul_i32_i24 v2, v3, 8 // tid*8
|
||||
v_mov_b32 v3, 256*2
|
||||
s_mov_b32 m0, 0x10000
|
||||
|
@ -701,8 +552,6 @@ L_SAVE_LDS_LOOP_VECTOR:
|
|||
// restore rsrc3
|
||||
s_mov_b32 s_save_buf_rsrc3, s0
|
||||
|
||||
end
|
||||
|
||||
L_SAVE_LDS_DONE:
|
||||
|
||||
|
||||
|
@ -720,44 +569,9 @@ L_SAVE_LDS_DONE:
|
|||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
|
||||
s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible
|
||||
s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
|
||||
// VGPR Allocated in 4-GPR granularity
|
||||
|
||||
if G8SR_VGPR_SR_IN_DWX4
|
||||
// the const stride for DWx4 is 4*4 bytes
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
|
||||
|
||||
s_mov_b32 m0, 4 // skip first 4 VGPRs
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs
|
||||
|
||||
s_set_gpr_idx_on m0, 0x1 // This will change M0
|
||||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0
|
||||
L_SAVE_VGPR_LOOP:
|
||||
v_mov_b32 v0, v0 // v0 = v[0+m0]
|
||||
v_mov_b32 v1, v1
|
||||
v_mov_b32 v2, v2
|
||||
v_mov_b32 v3, v3
|
||||
|
||||
|
||||
buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
s_add_u32 m0, m0, 4
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
|
||||
s_set_gpr_idx_off
|
||||
L_SAVE_VGPR_LOOP_END:
|
||||
|
||||
s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
|
||||
else
|
||||
// VGPR store using dw burst
|
||||
s_mov_b32 m0, 0x4 //VGPR initial index value =0
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
|
@ -767,57 +581,82 @@ else
|
|||
s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
|
||||
s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later
|
||||
|
||||
if SAVE_AFTER_XNACK_ERROR
|
||||
check_if_tcp_store_ok()
|
||||
s_cbranch_scc1 L_SAVE_VGPR_LOOP
|
||||
|
||||
L_SAVE_VGPR_LOOP_SQC:
|
||||
write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
s_add_u32 m0, m0, 4
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC
|
||||
|
||||
s_set_gpr_idx_off
|
||||
s_branch L_SAVE_VGPR_END
|
||||
end
|
||||
|
||||
L_SAVE_VGPR_LOOP:
|
||||
v_mov_b32 v0, v0 //v0 = v[0+m0]
|
||||
v_mov_b32 v1, v1 //v0 = v[0+m0]
|
||||
v_mov_b32 v2, v2 //v0 = v[0+m0]
|
||||
v_mov_b32 v3, v3 //v0 = v[0+m0]
|
||||
|
||||
if(USE_MTBUF_INSTEAD_OF_MUBUF)
|
||||
tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
|
||||
else
|
||||
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
|
||||
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
|
||||
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
|
||||
end
|
||||
|
||||
s_add_u32 m0, m0, 4 //next vgpr index
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
|
||||
s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
|
||||
s_set_gpr_idx_off
|
||||
end
|
||||
|
||||
L_SAVE_VGPR_END:
|
||||
|
||||
if ASIC_TARGET_ARCTURUS
|
||||
// Save ACC VGPRs
|
||||
s_mov_b32 m0, 0x0 //VGPR initial index value =0
|
||||
s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
|
||||
|
||||
if SAVE_AFTER_XNACK_ERROR
|
||||
check_if_tcp_store_ok()
|
||||
s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP
|
||||
|
||||
|
||||
|
||||
|
||||
/* S_PGM_END_SAVED */ //FIXME graphics ONLY
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
|
||||
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
|
||||
s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
|
||||
s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
|
||||
s_rfe_b64 s_save_pc_lo //Return to the main shader program
|
||||
else
|
||||
L_SAVE_ACCVGPR_LOOP_SQC:
|
||||
for var vgpr = 0; vgpr < 4; ++ vgpr
|
||||
v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0]
|
||||
end
|
||||
|
||||
// Save Done timestamp
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_save_d
|
||||
// SGPR SR memory offset : size(VGPR)
|
||||
get_vgpr_size_bytes(s_save_mem_offset)
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET
|
||||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||
// Need reset rsrc2??
|
||||
s_mov_b32 m0, s_save_mem_offset
|
||||
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1
|
||||
write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
s_add_u32 m0, m0, 4
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP_SQC
|
||||
|
||||
s_set_gpr_idx_off
|
||||
s_branch L_SAVE_ACCVGPR_END
|
||||
end
|
||||
|
||||
L_SAVE_ACCVGPR_LOOP:
|
||||
for var vgpr = 0; vgpr < 4; ++ vgpr
|
||||
v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0]
|
||||
end
|
||||
|
||||
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
|
||||
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
|
||||
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
|
||||
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
|
||||
|
||||
s_add_u32 m0, m0, 4
|
||||
s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
|
||||
s_cmp_lt_u32 m0, s_save_alloc_size
|
||||
s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP
|
||||
s_set_gpr_idx_off
|
||||
|
||||
L_SAVE_ACCVGPR_END:
|
||||
end
|
||||
|
||||
s_branch L_END_PGM
|
||||
|
||||
|
@ -829,27 +668,6 @@ end
|
|||
|
||||
L_RESTORE:
|
||||
/* Setup Resource Contants */
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
|
||||
//calculate wd_addr using absolute thread id
|
||||
v_readlane_b32 s_restore_tmp, v9, 0
|
||||
s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
|
||||
s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
|
||||
s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
|
||||
s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
|
||||
s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
|
||||
else
|
||||
end
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_restore_s
|
||||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||
// tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case...
|
||||
s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0]
|
||||
s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored..
|
||||
end
|
||||
|
||||
|
||||
|
||||
s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
|
||||
s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
|
||||
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
|
||||
|
@ -891,18 +709,12 @@ end
|
|||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow???
|
||||
|
||||
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
s_mov_b32 m0, 0x0 //lds_offset initial value = 0
|
||||
|
||||
L_RESTORE_LDS_LOOP:
|
||||
if (SAVE_LDS)
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
|
||||
end
|
||||
s_add_u32 m0, m0, 256*2 // 128 DW
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
|
||||
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
|
||||
|
@ -921,56 +733,43 @@ end
|
|||
s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
|
||||
s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
|
||||
s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
|
||||
if ASIC_TARGET_ARCTURUS
|
||||
s_mov_b32 s_restore_accvgpr_offset, s_restore_buf_rsrc2 //ACC VGPRs at end of VGPRs
|
||||
end
|
||||
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
if G8SR_VGPR_SR_IN_DWX4
|
||||
get_vgpr_size_bytes(s_restore_mem_offset)
|
||||
s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
|
||||
|
||||
// the const stride for DWx4 is 4*4 bytes
|
||||
s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
|
||||
|
||||
s_mov_b32 m0, s_restore_alloc_size
|
||||
s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0
|
||||
|
||||
L_RESTORE_VGPR_LOOP:
|
||||
buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
|
||||
s_waitcnt vmcnt(0)
|
||||
s_sub_u32 m0, m0, 4
|
||||
v_mov_b32 v0, v0 // v[0+m0] = v0
|
||||
v_mov_b32 v1, v1
|
||||
v_mov_b32 v2, v2
|
||||
v_mov_b32 v3, v3
|
||||
s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
|
||||
s_cmp_eq_u32 m0, 0x8000
|
||||
s_cbranch_scc0 L_RESTORE_VGPR_LOOP
|
||||
s_set_gpr_idx_off
|
||||
|
||||
s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
|
||||
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes
|
||||
|
||||
else
|
||||
// VGPR load using dw burst
|
||||
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
|
||||
if ASIC_TARGET_ARCTURUS
|
||||
s_mov_b32 s_restore_accvgpr_offset_save, s_restore_accvgpr_offset
|
||||
s_add_u32 s_restore_accvgpr_offset, s_restore_accvgpr_offset, 256*4
|
||||
end
|
||||
s_mov_b32 m0, 4 //VGPR initial index value = 1
|
||||
s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
|
||||
s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
|
||||
|
||||
L_RESTORE_VGPR_LOOP:
|
||||
if(USE_MTBUF_INSTEAD_OF_MUBUF)
|
||||
tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
|
||||
else
|
||||
|
||||
if ASIC_TARGET_ARCTURUS
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1
|
||||
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256
|
||||
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256*2
|
||||
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256*3
|
||||
s_add_u32 s_restore_accvgpr_offset, s_restore_accvgpr_offset, 256*4
|
||||
s_waitcnt vmcnt(0)
|
||||
|
||||
for var vgpr = 0; vgpr < 4; ++ vgpr
|
||||
v_accvgpr_write acc[vgpr], v[vgpr]
|
||||
end
|
||||
end
|
||||
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
|
||||
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
|
||||
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
|
||||
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
|
||||
end
|
||||
s_waitcnt vmcnt(0) //ensure data ready
|
||||
v_mov_b32 v0, v0 //v[0+m0] = v0
|
||||
v_mov_b32 v1, v1
|
||||
|
@ -982,16 +781,22 @@ else
|
|||
s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete?
|
||||
s_set_gpr_idx_off
|
||||
/* VGPR restore on v0 */
|
||||
if(USE_MTBUF_INSTEAD_OF_MUBUF)
|
||||
tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
|
||||
else
|
||||
if ASIC_TARGET_ARCTURUS
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1
|
||||
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256
|
||||
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256*2
|
||||
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256*3
|
||||
s_waitcnt vmcnt(0)
|
||||
|
||||
for var vgpr = 0; vgpr < 4; ++ vgpr
|
||||
v_accvgpr_write acc[vgpr], v[vgpr]
|
||||
end
|
||||
end
|
||||
|
||||
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
|
||||
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
|
||||
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
|
||||
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
/* restore SGPRs */
|
||||
//////////////////////////////
|
||||
|
@ -1007,16 +812,8 @@ end
|
|||
s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
|
||||
s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
|
||||
|
||||
if (SGPR_SAVE_USE_SQC)
|
||||
s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes
|
||||
else
|
||||
s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
|
||||
end
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
s_mov_b32 m0, s_restore_alloc_size
|
||||
|
||||
|
@ -1044,11 +841,6 @@ end
|
|||
L_RESTORE_HWREG:
|
||||
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo
|
||||
s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi
|
||||
end
|
||||
|
||||
// HWREG SR memory offset : size(VGPR)+size(SGPR)
|
||||
get_vgpr_size_bytes(s_restore_mem_offset)
|
||||
get_sgpr_size_bytes(s_restore_tmp)
|
||||
|
@ -1056,11 +848,7 @@ end
|
|||
|
||||
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
|
||||
if (SWIZZLE_EN)
|
||||
s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
|
||||
else
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
end
|
||||
|
||||
read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0
|
||||
read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC
|
||||
|
@ -1075,16 +863,6 @@ end
|
|||
|
||||
s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
|
||||
|
||||
//for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
|
||||
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
|
||||
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
|
||||
s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
|
||||
end
|
||||
if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
|
||||
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal
|
||||
s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
|
||||
end
|
||||
|
||||
s_mov_b32 m0, s_restore_m0
|
||||
s_mov_b32 exec_lo, s_restore_exec_lo
|
||||
s_mov_b32 exec_hi, s_restore_exec_hi
|
||||
|
@ -1131,11 +909,6 @@ end
|
|||
|
||||
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
|
||||
|
||||
if G8SR_DEBUG_TIMESTAMP
|
||||
s_memrealtime s_g8sr_ts_restore_d
|
||||
s_waitcnt lgkmcnt(0)
|
||||
end
|
||||
|
||||
// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
|
||||
s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
|
||||
|
||||
|
@ -1190,7 +963,39 @@ function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
|
|||
s_sub_u32 s_mem_offset, s_mem_offset, 4*16
|
||||
end
|
||||
|
||||
function check_if_tcp_store_ok
|
||||
// If STATUS.ALLOW_REPLAY=0 and TRAPSTS.XNACK_ERROR=1 then TCP stores will fail.
|
||||
s_and_b32 s_save_tmp, s_save_status, SQ_WAVE_STATUS_ALLOW_REPLAY_MASK
|
||||
s_cbranch_scc1 L_TCP_STORE_CHECK_DONE
|
||||
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
|
||||
s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp
|
||||
|
||||
L_TCP_STORE_CHECK_DONE:
|
||||
end
|
||||
|
||||
function write_vgpr_to_mem_with_sqc(v, s_rsrc, s_mem_offset)
|
||||
s_mov_b32 s4, 0
|
||||
|
||||
L_WRITE_VGPR_LANE_LOOP:
|
||||
for var lane = 0; lane < 4; ++ lane
|
||||
v_readlane_b32 s[lane], v, s4
|
||||
s_add_u32 s4, s4, 1
|
||||
end
|
||||
|
||||
s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1
|
||||
ack_sqc_store_workaround()
|
||||
|
||||
s_add_u32 s_mem_offset, s_mem_offset, 0x10
|
||||
s_cmp_eq_u32 s4, 0x40
|
||||
s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP
|
||||
end
|
||||
|
||||
function write_vgprs_to_mem_with_sqc(v, n_vgprs, s_rsrc, s_mem_offset)
|
||||
for var vgpr = 0; vgpr < n_vgprs; ++ vgpr
|
||||
write_vgpr_to_mem_with_sqc(v[vgpr], s_rsrc, s_mem_offset)
|
||||
end
|
||||
end
|
||||
|
||||
function get_lds_size_bytes(s_lds_size_byte)
|
||||
// SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
|
||||
|
@ -1202,6 +1007,10 @@ function get_vgpr_size_bytes(s_vgpr_size_byte)
|
|||
s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
|
||||
s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
|
||||
s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible
|
||||
|
||||
if ASIC_TARGET_ARCTURUS
|
||||
s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, 1 // Double size for ACC VGPRs
|
||||
end
|
||||
end
|
||||
|
||||
function get_sgpr_size_bytes(s_sgpr_size_byte)
|
||||
|
|
|
@ -662,6 +662,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
|||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_ARCTURUS:
|
||||
pcache_info = vega10_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
|
||||
break;
|
||||
|
@ -788,7 +789,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
|
|||
* is put in the code to ensure we don't overwrite.
|
||||
*/
|
||||
#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE)
|
||||
#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE)
|
||||
#define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE)
|
||||
|
||||
/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
|
||||
*
|
||||
|
|
|
@ -317,6 +317,23 @@ static const struct kfd_device_info vega20_device_info = {
|
|||
.num_sdma_queues_per_engine = 8,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info arcturus_device_info = {
|
||||
.asic_family = CHIP_ARCTURUS,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 8,
|
||||
.ih_ring_entry_size = 8 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_v9,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 6,
|
||||
.num_sdma_queues_per_engine = 8,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info navi10_device_info = {
|
||||
.asic_family = CHIP_NAVI10,
|
||||
.max_pasid_bits = 16,
|
||||
|
@ -452,7 +469,9 @@ static const struct kfd_deviceid supported_devices[] = {
|
|||
{ 0x66a4, &vega20_device_info }, /* Vega20 */
|
||||
{ 0x66a7, &vega20_device_info }, /* Vega20 */
|
||||
{ 0x66af, &vega20_device_info }, /* Vega20 */
|
||||
/* Navi10 */
|
||||
{ 0x738C, &arcturus_device_info }, /* Arcturus */
|
||||
{ 0x7388, &arcturus_device_info }, /* Arcturus */
|
||||
{ 0x738E, &arcturus_device_info }, /* Arcturus */
|
||||
{ 0x7310, &navi10_device_info }, /* Navi10 */
|
||||
{ 0x7312, &navi10_device_info }, /* Navi10 */
|
||||
{ 0x7318, &navi10_device_info }, /* Navi10 */
|
||||
|
@ -536,6 +555,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
|
|||
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
|
||||
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
|
||||
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
|
||||
} else if (kfd->device_info->asic_family == CHIP_ARCTURUS) {
|
||||
BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
|
||||
kfd->cwsr_isa = cwsr_trap_arcturus_hex;
|
||||
kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
|
||||
} else if (kfd->device_info->asic_family < CHIP_NAVI10) {
|
||||
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
|
||||
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
|
||||
|
|
|
@ -880,8 +880,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
|||
}
|
||||
|
||||
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
|
||||
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
|
||||
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
|
||||
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1019,8 +1019,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
|
|||
dqm->sdma_queue_count = 0;
|
||||
dqm->xgmi_sdma_queue_count = 0;
|
||||
dqm->active_runlist = false;
|
||||
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
|
||||
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
|
||||
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
|
||||
|
||||
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
|
||||
|
||||
|
@ -1786,6 +1786,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_ARCTURUS:
|
||||
device_queue_manager_init_v9(&dqm->asic_ops);
|
||||
break;
|
||||
case CHIP_NAVI10:
|
||||
|
|
|
@ -405,6 +405,7 @@ int kfd_init_apertures(struct kfd_process *process)
|
|||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_RAVEN:
|
||||
case CHIP_ARCTURUS:
|
||||
case CHIP_NAVI10:
|
||||
kfd_init_apertures_v9(pdd, id);
|
||||
break;
|
||||
|
|
|
@ -80,6 +80,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
|
|||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
|
||||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
|
||||
client_id == SOC15_IH_CLIENTID_VMC ||
|
||||
client_id == SOC15_IH_CLIENTID_VMC1 ||
|
||||
client_id == SOC15_IH_CLIENTID_UTCL2;
|
||||
}
|
||||
|
||||
|
@ -104,6 +105,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
|
|||
else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
|
||||
kfd_signal_hw_exception_event(pasid);
|
||||
else if (client_id == SOC15_IH_CLIENTID_VMC ||
|
||||
client_id == SOC15_IH_CLIENTID_VMC1 ||
|
||||
client_id == SOC15_IH_CLIENTID_UTCL2) {
|
||||
struct kfd_vm_fault_info info = {0};
|
||||
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue