Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.5-2023-06-02: amdgpu: - SR-IOV fixes - Warning fixes - Misc code cleanups and spelling fixes - DCN 3.2 updates - Improved DC FAMS support for better power management - Improved DC SubVP support for better power management - DCN 3.1.x fixes - Max IB size query - DC GPU reset fixes - RAS updates - DCN 3.0.x fixes - S/G display fixes - CP shadow buffer support - Implement connector force callback - Z8 power improvements - PSP 13.0.10 vbflash support - Mode2 reset fixes - Store MQDs in VRAM to improve queue switch latency - VCN 3.x fixes - JPEG 3.x fixes - Enable DC_FP on LoongArch - GFXOFF fixes - GC 9.4.3 partition support - SDMA 4.4.2 partition support - VCN/JPEG 4.0.3 partition support - VCN 4.0.3 updates - NBIO 7.9 updates - GC 9.4.3 updates - Take NUMA into account when allocating memory - Handle NUMA for partitions - SMU 13.0.6 updates - GC 9.4.3 RAS updates - Stop including unused swiotlb.h - SMU 13.0.7 fixes - Fix clock output ordering on some APUs - Clean up DC FPGA code - GFX9 preemption fixes - Misc irq fixes - S0ix fixes - Add new DRM_AMDGPU_WERROR config parameter to help with CI - PCIe fix for RDNA2 - kdoc fixes - Documentation updates amdkfd: - Query TTM mem limit rather than hardcoding it - GC 9.4.3 partition support - Handle NUMA for partitions radeon: - Fix possible double free - Stop including unused swiotlb.h - Fix possible division by zero ttm: - Add query for TTM mem limit - Add NUMA awareness to pools - Export ttm_pool_fini() UAPI: - Add new ctx query flag to better handle GPU resets Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290 - Add new interface to query and set shadow buffer for RDNA3 Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986 - Add new INFO query for max IB size Proposed userspace: https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/commits/ib-rejection-v3 amd-drm-next-6.5-2023-06-09: amdgpu: - S0ix fixes - Initial SMU13 Overdrive support - kdoc fixes - Misc clode cleanups - Flexible array fixes - Display OTG fixes - SMU 13.0.6 updates - Revert some broken clock counter updates - Misc display fixes - GFX9 preemption fixes - Add support for newer EEPROM bad page table format - Add missing radeon secondary id - Add support for new colorspace KMS API - CSA fix - Stable pstate fixes for APUs - make vbl interface admin only - Handle PCI accelerator class amdkfd: - Add debugger support for gdb radeon: - Fix possible UAF drm: - Add Colorspace functionality UAPI: - Add debugger interface for enabling gdb Proposed userspace: https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi - Add KMS colorspace API Discussion: https://lists.freedesktop.org/archives/dri-devel/2023-June/408128.html From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230609174817.7764-1-alexander.deucher@amd.com
This commit is contained in:
commit
901bdf5ea1
|
@ -5,6 +5,8 @@ Ryzen 4000 series, RENOIR, DCN 2.1, 9.3, VCN 2.2, 4.1.2, 11.0.3
|
|||
Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN2, DCN 1.0, 9.2.2, VCN 1.0.1, 4.1.1, 10.0.1
|
||||
SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
|
||||
Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
|
||||
Ryzen 6000 series / Ryzen 7x35 series, YELLOW CARP / Rembrandt / Rembrandt+, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
|
||||
Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
|
||||
Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
|
||||
Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
|
||||
Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
|
||||
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
|
|
|
@ -140,6 +140,7 @@ obj-$(CONFIG_DRM_TTM) += ttm/
|
|||
obj-$(CONFIG_DRM_SCHED) += scheduler/
|
||||
obj-$(CONFIG_DRM_RADEON)+= radeon/
|
||||
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
|
||||
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
|
||||
obj-$(CONFIG_DRM_I915) += i915/
|
||||
obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/
|
||||
obj-$(CONFIG_DRM_MGAG200) += mgag200/
|
||||
|
|
|
@ -69,6 +69,16 @@ config DRM_AMDGPU_USERPTR
|
|||
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
|
||||
isn't already selected to enabled full userptr support.
|
||||
|
||||
config DRM_AMDGPU_WERROR
|
||||
bool "Force the compiler to throw an error instead of a warning when compiling"
|
||||
depends on DRM_AMDGPU
|
||||
depends on EXPERT
|
||||
depends on !COMPILE_TEST
|
||||
default n
|
||||
help
|
||||
Add -Werror to the build flags for amdgpu.ko.
|
||||
Only enable this if you are warning code for amdgpu.ko.
|
||||
|
||||
source "drivers/gpu/drm/amd/acp/Kconfig"
|
||||
source "drivers/gpu/drm/amd/display/Kconfig"
|
||||
source "drivers/gpu/drm/amd/amdkfd/Kconfig"
|
||||
|
|
|
@ -39,6 +39,15 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
|
|||
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
|
||||
-I$(FULL_AMD_PATH)/amdkfd
|
||||
|
||||
subdir-ccflags-y := -Wextra
|
||||
subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
|
||||
subdir-ccflags-y += -Wno-unused-parameter
|
||||
subdir-ccflags-y += -Wno-type-limits
|
||||
subdir-ccflags-y += -Wno-sign-compare
|
||||
subdir-ccflags-y += -Wno-missing-field-initializers
|
||||
subdir-ccflags-y += -Wno-override-init
|
||||
subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
|
||||
|
||||
amdgpu-y := amdgpu_drv.o
|
||||
|
||||
# add KMS driver
|
||||
|
@ -60,7 +69,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
|
|||
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
|
||||
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
|
||||
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
|
||||
amdgpu_ring_mux.o
|
||||
amdgpu_ring_mux.o amdgpu_xcp.o
|
||||
|
||||
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
|
||||
|
||||
|
@ -78,7 +87,7 @@ amdgpu-y += \
|
|||
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
|
||||
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
|
||||
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
|
||||
nbio_v7_9.o
|
||||
nbio_v7_9.o aqua_vanjaram_reg_init.o
|
||||
|
||||
# add DF block
|
||||
amdgpu-y += \
|
||||
|
@ -183,12 +192,14 @@ amdgpu-y += \
|
|||
vcn_v2_5.o \
|
||||
vcn_v3_0.o \
|
||||
vcn_v4_0.o \
|
||||
vcn_v4_0_3.o \
|
||||
amdgpu_jpeg.o \
|
||||
jpeg_v1_0.o \
|
||||
jpeg_v2_0.o \
|
||||
jpeg_v2_5.o \
|
||||
jpeg_v3_0.o \
|
||||
jpeg_v4_0.o
|
||||
jpeg_v4_0.o \
|
||||
jpeg_v4_0_3.o
|
||||
|
||||
# add ATHUB block
|
||||
amdgpu-y += \
|
||||
|
@ -203,6 +214,7 @@ amdgpu-y += \
|
|||
smuio_v11_0.o \
|
||||
smuio_v11_0_6.o \
|
||||
smuio_v13_0.o \
|
||||
smuio_v13_0_3.o \
|
||||
smuio_v13_0_6.o
|
||||
|
||||
# add reset block
|
||||
|
@ -228,6 +240,7 @@ amdgpu-y += \
|
|||
amdgpu_amdkfd_gfx_v9.o \
|
||||
amdgpu_amdkfd_arcturus.o \
|
||||
amdgpu_amdkfd_aldebaran.o \
|
||||
amdgpu_amdkfd_gc_9_4_3.o \
|
||||
amdgpu_amdkfd_gfx_v10.o \
|
||||
amdgpu_amdkfd_gfx_v10_3.o \
|
||||
amdgpu_amdkfd_gfx_v11.o
|
||||
|
|
|
@ -107,8 +107,9 @@
|
|||
#include "amdgpu_fdinfo.h"
|
||||
#include "amdgpu_mca.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
|
||||
#define MAX_GPU_INSTANCE 16
|
||||
#define MAX_GPU_INSTANCE 64
|
||||
|
||||
struct amdgpu_gpu_instance
|
||||
{
|
||||
|
@ -212,6 +213,8 @@ extern int amdgpu_noretry;
|
|||
extern int amdgpu_force_asic_type;
|
||||
extern int amdgpu_smartshift_bias;
|
||||
extern int amdgpu_use_xgmi_p2p;
|
||||
extern int amdgpu_mtype_local;
|
||||
extern bool enforce_isolation;
|
||||
#ifdef CONFIG_HSA_AMD
|
||||
extern int sched_policy;
|
||||
extern bool debug_evictions;
|
||||
|
@ -242,9 +245,10 @@ extern int amdgpu_num_kcq;
|
|||
extern int amdgpu_vcnfw_log;
|
||||
extern int amdgpu_sg_display;
|
||||
|
||||
extern int amdgpu_user_partt_mode;
|
||||
|
||||
#define AMDGPU_VM_MAX_NUM_CTX 4096
|
||||
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
|
||||
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
|
||||
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
|
||||
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
|
||||
#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
|
||||
|
@ -282,6 +286,7 @@ extern int amdgpu_sg_display;
|
|||
#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
|
||||
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
|
||||
|
||||
struct amdgpu_xcp_mgr;
|
||||
struct amdgpu_device;
|
||||
struct amdgpu_irq_src;
|
||||
struct amdgpu_fpriv;
|
||||
|
@ -463,6 +468,8 @@ struct amdgpu_fpriv {
|
|||
struct mutex bo_list_lock;
|
||||
struct idr bo_list_handles;
|
||||
struct amdgpu_ctx_mgr ctx_mgr;
|
||||
/** GPU partition selection */
|
||||
uint32_t xcp_id;
|
||||
};
|
||||
|
||||
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
|
||||
|
@ -573,6 +580,8 @@ struct amdgpu_asic_funcs {
|
|||
/* query video codecs */
|
||||
int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
|
||||
const struct amdgpu_video_codecs **codecs);
|
||||
/* encode "> 32bits" smn addressing */
|
||||
u64 (*encode_ext_smn_addressing)(int ext_id);
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -607,6 +616,9 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
|
|||
typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
|
||||
typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
|
||||
|
||||
typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t);
|
||||
typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
|
||||
|
||||
typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
|
||||
typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
|
||||
|
||||
|
@ -657,7 +669,7 @@ enum amd_hw_ip_block_type {
|
|||
MAX_HWIP
|
||||
};
|
||||
|
||||
#define HWIP_MAX_INSTANCE 28
|
||||
#define HWIP_MAX_INSTANCE 44
|
||||
|
||||
#define HW_ID_MAX 300
|
||||
#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
|
||||
|
@ -665,6 +677,17 @@ enum amd_hw_ip_block_type {
|
|||
#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF)
|
||||
#define IP_VERSION_REV(ver) ((ver) & 0xFF)
|
||||
|
||||
struct amdgpu_ip_map_info {
|
||||
/* Map of logical to actual dev instances/mask */
|
||||
uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
|
||||
int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
|
||||
enum amd_hw_ip_block_type block,
|
||||
int8_t inst);
|
||||
uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
|
||||
enum amd_hw_ip_block_type block,
|
||||
uint32_t mask);
|
||||
};
|
||||
|
||||
struct amd_powerplay {
|
||||
void *pp_handle;
|
||||
const struct amd_pm_funcs *pp_funcs;
|
||||
|
@ -750,6 +773,7 @@ struct amdgpu_device {
|
|||
struct amdgpu_acp acp;
|
||||
#endif
|
||||
struct amdgpu_hive_info *hive;
|
||||
struct amdgpu_xcp_mgr *xcp_mgr;
|
||||
/* ASIC */
|
||||
enum amd_asic_type asic_type;
|
||||
uint32_t family;
|
||||
|
@ -797,6 +821,8 @@ struct amdgpu_device {
|
|||
amdgpu_wreg_t pcie_wreg;
|
||||
amdgpu_rreg_t pciep_rreg;
|
||||
amdgpu_wreg_t pciep_wreg;
|
||||
amdgpu_rreg_ext_t pcie_rreg_ext;
|
||||
amdgpu_wreg_ext_t pcie_wreg_ext;
|
||||
amdgpu_rreg64_t pcie_rreg64;
|
||||
amdgpu_wreg64_t pcie_wreg64;
|
||||
/* protects concurrent UVD register access */
|
||||
|
@ -830,7 +856,7 @@ struct amdgpu_device {
|
|||
dma_addr_t dummy_page_addr;
|
||||
struct amdgpu_vm_manager vm_manager;
|
||||
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
|
||||
unsigned num_vmhubs;
|
||||
DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS);
|
||||
|
||||
/* memory management */
|
||||
struct amdgpu_mman mman;
|
||||
|
@ -962,6 +988,7 @@ struct amdgpu_device {
|
|||
|
||||
/* soc15 register offset based on ip, instance and segment */
|
||||
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
|
||||
struct amdgpu_ip_map_info ip_map;
|
||||
|
||||
/* delayed work_func for deferring clockgating during resume */
|
||||
struct delayed_work delayed_init_work;
|
||||
|
@ -1020,6 +1047,9 @@ struct amdgpu_device {
|
|||
struct pci_saved_state *pci_state;
|
||||
pci_channel_state_t pci_channel_state;
|
||||
|
||||
/* Track auto wait count on s_barrier settings */
|
||||
bool barrier_has_auto_waitcnt;
|
||||
|
||||
struct amdgpu_reset_control *reset_cntl;
|
||||
uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
|
||||
|
||||
|
@ -1050,6 +1080,8 @@ struct amdgpu_device {
|
|||
|
||||
bool job_hang;
|
||||
bool dc_enabled;
|
||||
/* Mask of active clusters */
|
||||
uint32_t aid_mask;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
|
||||
|
@ -1081,11 +1113,18 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
|
|||
|
||||
void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
|
||||
void *buf, size_t size, bool write);
|
||||
uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
|
||||
uint32_t inst, uint32_t reg_addr, char reg_name[],
|
||||
uint32_t expected_value, uint32_t mask);
|
||||
uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
|
||||
uint32_t reg, uint32_t acc_flags);
|
||||
u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
|
||||
u64 reg_addr);
|
||||
void amdgpu_device_wreg(struct amdgpu_device *adev,
|
||||
uint32_t reg, uint32_t v,
|
||||
uint32_t acc_flags);
|
||||
void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
|
||||
u64 reg_addr, u32 reg_data);
|
||||
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
|
||||
uint32_t reg, uint32_t v);
|
||||
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
|
||||
|
@ -1137,6 +1176,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
|
||||
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
|
||||
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
|
||||
#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg))
|
||||
#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
|
||||
#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
|
||||
#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
|
||||
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
|
||||
|
@ -1204,7 +1245,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
/*
|
||||
* ASICs macro.
|
||||
*/
|
||||
#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
|
||||
#define amdgpu_asic_set_vga_state(adev, state) \
|
||||
((adev)->asic_funcs->set_vga_state ? (adev)->asic_funcs->set_vga_state((adev), (state)) : 0)
|
||||
#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
|
||||
#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
|
||||
#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
|
||||
|
@ -1235,6 +1277,10 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
|
||||
#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
|
||||
|
||||
#define for_each_inst(i, inst_mask) \
|
||||
for (i = ffs(inst_mask) - 1; inst_mask; \
|
||||
inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1)
|
||||
|
||||
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
|
||||
|
||||
/* Common functions */
|
||||
|
@ -1348,6 +1394,12 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
|
|||
|
||||
/* amdgpu_acpi.c */
|
||||
|
||||
struct amdgpu_numa_info {
|
||||
uint64_t size;
|
||||
int pxm;
|
||||
int nid;
|
||||
};
|
||||
|
||||
/* ATCS Device/Driver State */
|
||||
#define AMDGPU_ATCS_PSC_DEV_STATE_D0 0
|
||||
#define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3
|
||||
|
@ -1365,15 +1417,32 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
|
|||
u8 dev_state, bool drv_state);
|
||||
int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state);
|
||||
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
|
||||
int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
|
||||
u64 *tmr_size);
|
||||
int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
|
||||
struct amdgpu_numa_info *numa_info);
|
||||
|
||||
void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
|
||||
bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
|
||||
void amdgpu_acpi_detect(void);
|
||||
void amdgpu_acpi_release(void);
|
||||
#else
|
||||
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
|
||||
static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev,
|
||||
u64 *tmr_offset, u64 *tmr_size)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev,
|
||||
int xcc_id,
|
||||
struct amdgpu_numa_info *numa_info)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
|
||||
static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
|
||||
static inline void amdgpu_acpi_detect(void) { }
|
||||
static inline void amdgpu_acpi_release(void) { }
|
||||
static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
|
||||
static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
|
||||
u8 dev_state, bool drv_state) { return 0; }
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/acpi.h>
|
||||
#include <linux/backlight.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/power_supply.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/suspend.h>
|
||||
|
@ -38,6 +39,45 @@
|
|||
#include "amd_acpi.h"
|
||||
#include "atom.h"
|
||||
|
||||
/* Declare GUID for AMD _DSM method for XCCs */
|
||||
static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
|
||||
0xb8, 0xb4, 0x45, 0x56, 0x2e,
|
||||
0x8c, 0x5b, 0xec);
|
||||
|
||||
#define AMD_XCC_HID_START 3000
|
||||
#define AMD_XCC_DSM_GET_NUM_FUNCS 0
|
||||
#define AMD_XCC_DSM_GET_SUPP_MODE 1
|
||||
#define AMD_XCC_DSM_GET_XCP_MODE 2
|
||||
#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4
|
||||
#define AMD_XCC_DSM_GET_TMR_INFO 5
|
||||
#define AMD_XCC_DSM_NUM_FUNCS 5
|
||||
|
||||
#define AMD_XCC_MAX_HID 24
|
||||
|
||||
struct xarray numa_info_xa;
|
||||
|
||||
/* Encapsulates the XCD acpi object information */
|
||||
struct amdgpu_acpi_xcc_info {
|
||||
struct list_head list;
|
||||
struct amdgpu_numa_info *numa_info;
|
||||
uint8_t xcp_node;
|
||||
uint8_t phy_id;
|
||||
acpi_handle handle;
|
||||
};
|
||||
|
||||
struct amdgpu_acpi_dev_info {
|
||||
struct list_head list;
|
||||
struct list_head xcc_list;
|
||||
uint16_t bdf;
|
||||
uint16_t supp_xcp_mode;
|
||||
uint16_t xcp_mode;
|
||||
uint16_t mem_mode;
|
||||
uint64_t tmr_base;
|
||||
uint64_t tmr_size;
|
||||
};
|
||||
|
||||
struct list_head amdgpu_acpi_dev_list;
|
||||
|
||||
struct amdgpu_atif_notification_cfg {
|
||||
bool enabled;
|
||||
int command_code;
|
||||
|
@ -801,6 +841,343 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta
|
|||
return r;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
static inline uint64_t amdgpu_acpi_get_numa_size(int nid)
|
||||
{
|
||||
/* This is directly using si_meminfo_node implementation as the
|
||||
* function is not exported.
|
||||
*/
|
||||
int zone_type;
|
||||
uint64_t managed_pages = 0;
|
||||
|
||||
pg_data_t *pgdat = NODE_DATA(nid);
|
||||
|
||||
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
|
||||
managed_pages +=
|
||||
zone_managed_pages(&pgdat->node_zones[zone_type]);
|
||||
return managed_pages * PAGE_SIZE;
|
||||
}
|
||||
|
||||
static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
|
||||
{
|
||||
struct amdgpu_numa_info *numa_info;
|
||||
int nid;
|
||||
|
||||
numa_info = xa_load(&numa_info_xa, pxm);
|
||||
|
||||
if (!numa_info) {
|
||||
struct sysinfo info;
|
||||
|
||||
numa_info = kzalloc(sizeof *numa_info, GFP_KERNEL);
|
||||
if (!numa_info)
|
||||
return NULL;
|
||||
|
||||
nid = pxm_to_node(pxm);
|
||||
numa_info->pxm = pxm;
|
||||
numa_info->nid = nid;
|
||||
|
||||
if (numa_info->nid == NUMA_NO_NODE) {
|
||||
si_meminfo(&info);
|
||||
numa_info->size = info.totalram * info.mem_unit;
|
||||
} else {
|
||||
numa_info->size = amdgpu_acpi_get_numa_size(nid);
|
||||
}
|
||||
xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL);
|
||||
}
|
||||
|
||||
return numa_info;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
|
||||
* acpi device handle
|
||||
*
|
||||
* @handle: acpi handle
|
||||
* @numa_info: amdgpu_numa_info structure holding numa information
|
||||
*
|
||||
* Queries the ACPI interface to fetch the corresponding NUMA Node ID for a
|
||||
* given amdgpu acpi device.
|
||||
*
|
||||
* Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason
|
||||
*/
|
||||
static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
|
||||
struct amdgpu_numa_info **numa_info)
|
||||
{
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
u64 pxm;
|
||||
acpi_status status;
|
||||
|
||||
if (!numa_info)
|
||||
return_ACPI_STATUS(AE_ERROR);
|
||||
|
||||
status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
|
||||
|
||||
if (ACPI_FAILURE(status))
|
||||
return status;
|
||||
|
||||
*numa_info = amdgpu_acpi_get_numa_info(pxm);
|
||||
|
||||
if (!*numa_info)
|
||||
return_ACPI_STATUS(AE_ERROR);
|
||||
|
||||
return_ACPI_STATUS(AE_OK);
|
||||
#else
|
||||
return_ACPI_STATUS(AE_NOT_EXIST);
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
|
||||
{
|
||||
struct amdgpu_acpi_dev_info *acpi_dev;
|
||||
|
||||
if (list_empty(&amdgpu_acpi_dev_list))
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
|
||||
if (acpi_dev->bdf == bdf)
|
||||
return acpi_dev;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
|
||||
struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf)
|
||||
{
|
||||
struct amdgpu_acpi_dev_info *tmp;
|
||||
union acpi_object *obj;
|
||||
int ret = -ENOENT;
|
||||
|
||||
*dev_info = NULL;
|
||||
tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&tmp->xcc_list);
|
||||
INIT_LIST_HEAD(&tmp->list);
|
||||
tmp->bdf = bdf;
|
||||
|
||||
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
|
||||
AMD_XCC_DSM_GET_SUPP_MODE, NULL,
|
||||
ACPI_TYPE_INTEGER);
|
||||
|
||||
if (!obj) {
|
||||
acpi_handle_debug(xcc_info->handle,
|
||||
"_DSM function %d evaluation failed",
|
||||
AMD_XCC_DSM_GET_SUPP_MODE);
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tmp->supp_xcp_mode = obj->integer.value & 0xFFFF;
|
||||
ACPI_FREE(obj);
|
||||
|
||||
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
|
||||
AMD_XCC_DSM_GET_XCP_MODE, NULL,
|
||||
ACPI_TYPE_INTEGER);
|
||||
|
||||
if (!obj) {
|
||||
acpi_handle_debug(xcc_info->handle,
|
||||
"_DSM function %d evaluation failed",
|
||||
AMD_XCC_DSM_GET_XCP_MODE);
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tmp->xcp_mode = obj->integer.value & 0xFFFF;
|
||||
tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF;
|
||||
ACPI_FREE(obj);
|
||||
|
||||
/* Evaluate DSMs and fill XCC information */
|
||||
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
|
||||
AMD_XCC_DSM_GET_TMR_INFO, NULL,
|
||||
ACPI_TYPE_PACKAGE);
|
||||
|
||||
if (!obj || obj->package.count < 2) {
|
||||
acpi_handle_debug(xcc_info->handle,
|
||||
"_DSM function %d evaluation failed",
|
||||
AMD_XCC_DSM_GET_TMR_INFO);
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tmp->tmr_base = obj->package.elements[0].integer.value;
|
||||
tmp->tmr_size = obj->package.elements[1].integer.value;
|
||||
ACPI_FREE(obj);
|
||||
|
||||
DRM_DEBUG_DRIVER(
|
||||
"New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ",
|
||||
tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
|
||||
tmp->tmr_base, tmp->tmr_size);
|
||||
list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
|
||||
*dev_info = tmp;
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
if (obj)
|
||||
ACPI_FREE(obj);
|
||||
kfree(tmp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
|
||||
u16 *bdf)
|
||||
{
|
||||
union acpi_object *obj;
|
||||
acpi_status status;
|
||||
int ret = -ENOENT;
|
||||
|
||||
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
|
||||
AMD_XCC_DSM_GET_NUM_FUNCS, NULL,
|
||||
ACPI_TYPE_INTEGER);
|
||||
|
||||
if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS)
|
||||
goto out;
|
||||
ACPI_FREE(obj);
|
||||
|
||||
/* Evaluate DSMs and fill XCC information */
|
||||
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
|
||||
AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL,
|
||||
ACPI_TYPE_INTEGER);
|
||||
|
||||
if (!obj) {
|
||||
acpi_handle_debug(xcc_info->handle,
|
||||
"_DSM function %d evaluation failed",
|
||||
AMD_XCC_DSM_GET_VF_XCC_MAPPING);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* PF xcc id [39:32] */
|
||||
xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
|
||||
/* xcp node of this xcc [47:40] */
|
||||
xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
|
||||
/* PF bus/dev/fn of this xcc [63:48] */
|
||||
*bdf = (obj->integer.value >> 48) & 0xFFFF;
|
||||
ACPI_FREE(obj);
|
||||
obj = NULL;
|
||||
|
||||
status =
|
||||
amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info);
|
||||
|
||||
/* TODO: check if this check is required */
|
||||
if (ACPI_SUCCESS(status))
|
||||
ret = 0;
|
||||
out:
|
||||
if (obj)
|
||||
ACPI_FREE(obj);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_acpi_enumerate_xcc(void)
|
||||
{
|
||||
struct amdgpu_acpi_dev_info *dev_info = NULL;
|
||||
struct amdgpu_acpi_xcc_info *xcc_info;
|
||||
struct acpi_device *acpi_dev;
|
||||
char hid[ACPI_ID_LEN];
|
||||
int ret, id;
|
||||
u16 bdf;
|
||||
|
||||
INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
|
||||
xa_init(&numa_info_xa);
|
||||
|
||||
for (id = 0; id < AMD_XCC_MAX_HID; id++) {
|
||||
sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id);
|
||||
acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1);
|
||||
/* These ACPI objects are expected to be in sequential order. If
|
||||
* one is not found, no need to check the rest.
|
||||
*/
|
||||
if (!acpi_dev) {
|
||||
DRM_DEBUG_DRIVER("No matching acpi device found for %s",
|
||||
hid);
|
||||
break;
|
||||
}
|
||||
|
||||
xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info),
|
||||
GFP_KERNEL);
|
||||
if (!xcc_info) {
|
||||
DRM_ERROR("Failed to allocate memory for xcc info\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&xcc_info->list);
|
||||
xcc_info->handle = acpi_device_handle(acpi_dev);
|
||||
acpi_dev_put(acpi_dev);
|
||||
|
||||
ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf);
|
||||
if (ret) {
|
||||
kfree(xcc_info);
|
||||
continue;
|
||||
}
|
||||
|
||||
dev_info = amdgpu_acpi_get_dev(bdf);
|
||||
|
||||
if (!dev_info)
|
||||
ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf);
|
||||
|
||||
if (ret == -ENOMEM)
|
||||
return ret;
|
||||
|
||||
if (!dev_info) {
|
||||
kfree(xcc_info);
|
||||
continue;
|
||||
}
|
||||
|
||||
list_add_tail(&xcc_info->list, &dev_info->xcc_list);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
|
||||
u64 *tmr_size)
|
||||
{
|
||||
struct amdgpu_acpi_dev_info *dev_info;
|
||||
u16 bdf;
|
||||
|
||||
if (!tmr_offset || !tmr_size)
|
||||
return -EINVAL;
|
||||
|
||||
bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn;
|
||||
dev_info = amdgpu_acpi_get_dev(bdf);
|
||||
if (!dev_info)
|
||||
return -ENOENT;
|
||||
|
||||
*tmr_offset = dev_info->tmr_base;
|
||||
*tmr_size = dev_info->tmr_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
|
||||
struct amdgpu_numa_info *numa_info)
|
||||
{
|
||||
struct amdgpu_acpi_dev_info *dev_info;
|
||||
struct amdgpu_acpi_xcc_info *xcc_info;
|
||||
u16 bdf;
|
||||
|
||||
if (!numa_info)
|
||||
return -EINVAL;
|
||||
|
||||
bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn;
|
||||
dev_info = amdgpu_acpi_get_dev(bdf);
|
||||
if (!dev_info)
|
||||
return -ENOENT;
|
||||
|
||||
list_for_each_entry(xcc_info, &dev_info->xcc_list, list) {
|
||||
if (xcc_info->phy_id == xcc_id) {
|
||||
memcpy(numa_info, xcc_info->numa_info,
|
||||
sizeof(*numa_info));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_acpi_event - handle notify events
|
||||
*
|
||||
|
@ -1054,6 +1431,36 @@ void amdgpu_acpi_detect(void)
|
|||
} else {
|
||||
atif->backlight_caps.caps_valid = false;
|
||||
}
|
||||
|
||||
amdgpu_acpi_enumerate_xcc();
|
||||
}
|
||||
|
||||
void amdgpu_acpi_release(void)
|
||||
{
|
||||
struct amdgpu_acpi_dev_info *dev_info, *dev_tmp;
|
||||
struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp;
|
||||
struct amdgpu_numa_info *numa_info;
|
||||
unsigned long index;
|
||||
|
||||
xa_for_each(&numa_info_xa, index, numa_info) {
|
||||
kfree(numa_info);
|
||||
xa_erase(&numa_info_xa, index);
|
||||
}
|
||||
|
||||
if (list_empty(&amdgpu_acpi_dev_list))
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list,
|
||||
list) {
|
||||
list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list,
|
||||
list) {
|
||||
list_del(&xcc_info->list);
|
||||
kfree(xcc_info);
|
||||
}
|
||||
|
||||
list_del(&dev_info->list);
|
||||
kfree(dev_info);
|
||||
}
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_SUSPEND)
|
||||
|
@ -1092,16 +1499,20 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
|
|||
* S0ix even though the system is suspending to idle, so return false
|
||||
* in that case.
|
||||
*/
|
||||
if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0))
|
||||
dev_warn_once(adev->dev,
|
||||
if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
|
||||
dev_err_once(adev->dev,
|
||||
"Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
|
||||
"To use suspend-to-idle change the sleep mode in BIOS setup.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
#if !IS_ENABLED(CONFIG_AMD_PMC)
|
||||
dev_warn_once(adev->dev,
|
||||
dev_err_once(adev->dev,
|
||||
"Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
|
||||
#endif /* CONFIG_AMD_PMC */
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif /* CONFIG_AMD_PMC */
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SUSPEND */
|
||||
|
|
|
@ -53,7 +53,6 @@ int amdgpu_amdkfd_init(void)
|
|||
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
|
||||
|
||||
ret = kgd2kfd_init();
|
||||
amdgpu_amdkfd_gpuvm_init_mem_limits();
|
||||
kfd_initialized = !ret;
|
||||
|
||||
return ret;
|
||||
|
@ -143,6 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
int i;
|
||||
int last_valid_bit;
|
||||
|
||||
amdgpu_amdkfd_gpuvm_init_mem_limits();
|
||||
|
||||
if (adev->kfd.dev) {
|
||||
struct kgd2kfd_shared_resources gpu_resources = {
|
||||
.compute_vmid_bitmap =
|
||||
|
@ -162,7 +163,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
* clear
|
||||
*/
|
||||
bitmap_complement(gpu_resources.cp_queue_bitmap,
|
||||
adev->gfx.mec.queue_bitmap,
|
||||
adev->gfx.mec_bitmap[0].queue_bitmap,
|
||||
KGD_MAX_QUEUES);
|
||||
|
||||
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
|
||||
|
@ -427,14 +428,23 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
|
||||
struct kfd_local_mem_info *mem_info)
|
||||
struct kfd_local_mem_info *mem_info,
|
||||
struct amdgpu_xcp *xcp)
|
||||
{
|
||||
memset(mem_info, 0, sizeof(*mem_info));
|
||||
|
||||
mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
|
||||
mem_info->local_mem_size_private = adev->gmc.real_vram_size -
|
||||
if (xcp) {
|
||||
if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
|
||||
mem_info->local_mem_size_public =
|
||||
KFD_XCP_MEMORY_SIZE(adev, xcp->id);
|
||||
else
|
||||
mem_info->local_mem_size_private =
|
||||
KFD_XCP_MEMORY_SIZE(adev, xcp->id);
|
||||
} else {
|
||||
mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
|
||||
mem_info->local_mem_size_private = adev->gmc.real_vram_size -
|
||||
adev->gmc.visible_vram_size;
|
||||
|
||||
}
|
||||
mem_info->vram_width = adev->gmc.vram_width;
|
||||
|
||||
pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
|
||||
|
@ -497,7 +507,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
|
|||
struct amdgpu_device **dmabuf_adev,
|
||||
uint64_t *bo_size, void *metadata_buffer,
|
||||
size_t buffer_size, uint32_t *metadata_size,
|
||||
uint32_t *flags)
|
||||
uint32_t *flags, int8_t *xcp_id)
|
||||
{
|
||||
struct dma_buf *dma_buf;
|
||||
struct drm_gem_object *obj;
|
||||
|
@ -541,6 +551,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
|
|||
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
|
||||
*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
|
||||
}
|
||||
if (xcp_id)
|
||||
*xcp_id = bo->xcp_id;
|
||||
|
||||
out_put:
|
||||
dma_buf_put(dma_buf);
|
||||
|
@ -732,17 +744,19 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
|
|||
if (adev->family == AMDGPU_FAMILY_AI) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->num_vmhubs; i++)
|
||||
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
|
||||
} else {
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
|
||||
uint16_t pasid, enum TLB_FLUSH_TYPE flush_type)
|
||||
uint16_t pasid,
|
||||
enum TLB_FLUSH_TYPE flush_type,
|
||||
uint32_t inst)
|
||||
{
|
||||
bool all_hub = false;
|
||||
|
||||
|
@ -750,7 +764,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
|
|||
adev->family == AMDGPU_FAMILY_RV)
|
||||
all_hub = true;
|
||||
|
||||
return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
|
||||
return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
|
||||
}
|
||||
|
||||
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
|
||||
|
@ -758,11 +772,32 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
|
|||
return adev->have_atomics_support;
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_device_flush_hdp(adev, NULL);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
|
||||
{
|
||||
amdgpu_umc_poison_handler(adev, reset);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
|
||||
uint32_t *payload)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Device or IH ring is not ready so bail. */
|
||||
ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Send payload to fence KFD interrupts */
|
||||
amdgpu_amdkfd_interrupt(adev, payload);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
|
||||
|
@ -770,3 +805,28 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
|
|||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
|
||||
{
|
||||
return kgd2kfd_check_and_lock_kfd();
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
|
||||
{
|
||||
kgd2kfd_unlock_kfd();
|
||||
}
|
||||
|
||||
|
||||
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
|
||||
{
|
||||
u64 tmp;
|
||||
s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
|
||||
|
||||
if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
|
||||
tmp = adev->gmc.mem_partitions[mem_id].size;
|
||||
do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
|
||||
return ALIGN_DOWN(tmp, PAGE_SIZE);
|
||||
} else {
|
||||
return adev->gmc.real_vram_size;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,10 +30,12 @@
|
|||
#include <linux/kthread.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <kgd_kfd_interface.h>
|
||||
#include <drm/ttm/ttm_execbuf_util.h>
|
||||
#include "amdgpu_sync.h"
|
||||
#include "amdgpu_vm.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
|
||||
extern uint64_t amdgpu_amdkfd_total_mem_size;
|
||||
|
||||
|
@ -97,10 +99,13 @@ struct amdgpu_amdkfd_fence {
|
|||
|
||||
struct amdgpu_kfd_dev {
|
||||
struct kfd_dev *dev;
|
||||
int64_t vram_used;
|
||||
uint64_t vram_used_aligned;
|
||||
int64_t vram_used[MAX_XCP];
|
||||
uint64_t vram_used_aligned[MAX_XCP];
|
||||
bool init_complete;
|
||||
struct work_struct reset_work;
|
||||
|
||||
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
|
||||
struct dev_pagemap pgmap;
|
||||
};
|
||||
|
||||
enum kgd_engine_type {
|
||||
|
@ -151,6 +156,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
|
|||
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
|
||||
enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
|
@ -160,7 +167,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
|
|||
int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
|
||||
uint16_t vmid);
|
||||
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
|
||||
uint16_t pasid, enum TLB_FLUSH_TYPE flush_type);
|
||||
uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
|
||||
uint32_t inst);
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
|
||||
|
||||
|
@ -224,7 +232,8 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
|
|||
uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
|
||||
enum kgd_engine_type type);
|
||||
void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
|
||||
struct kfd_local_mem_info *mem_info);
|
||||
struct kfd_local_mem_info *mem_info,
|
||||
struct amdgpu_xcp *xcp);
|
||||
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
|
||||
|
||||
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
|
||||
|
@ -234,13 +243,15 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
|
|||
struct amdgpu_device **dmabuf_adev,
|
||||
uint64_t *bo_size, void *metadata_buffer,
|
||||
size_t buffer_size, uint32_t *metadata_size,
|
||||
uint32_t *flags);
|
||||
uint32_t *flags, int8_t *xcp_id);
|
||||
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
|
||||
struct amdgpu_device *src);
|
||||
int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
|
||||
struct amdgpu_device *src,
|
||||
bool is_min);
|
||||
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
|
||||
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
|
||||
uint32_t *payload);
|
||||
|
||||
/* Read user wptr from a specified user address space with page fault
|
||||
* disabled. The memory must be pinned and mapped to the hardware when
|
||||
|
@ -279,7 +290,8 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
|
|||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
|
||||
void *drm_priv);
|
||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
|
||||
uint8_t xcp_id);
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct amdgpu_device *adev, uint64_t va, uint64_t size,
|
||||
void *drm_priv, struct kgd_mem **mem,
|
||||
|
@ -310,6 +322,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
|
|||
uint64_t *mmap_offset);
|
||||
int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
|
||||
struct dma_buf **dmabuf);
|
||||
void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
|
||||
struct tile_config *config);
|
||||
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
|
@ -319,9 +332,18 @@ void amdgpu_amdkfd_block_mmu_notifications(void *p);
|
|||
int amdgpu_amdkfd_criu_resume(void *p);
|
||||
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 alloc_flag);
|
||||
uint64_t size, u32 alloc_flag, int8_t xcp_id);
|
||||
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 alloc_flag);
|
||||
uint64_t size, u32 alloc_flag, int8_t xcp_id);
|
||||
|
||||
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
|
||||
|
||||
#define KFD_XCP_MEM_ID(adev, xcp_id) \
|
||||
((adev)->xcp_mgr && (xcp_id) >= 0 ?\
|
||||
(adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
|
||||
|
||||
#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id))
|
||||
|
||||
|
||||
#if IS_ENABLED(CONFIG_HSA_AMD)
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||
|
@ -352,6 +374,17 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
|
|||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
|
||||
int kgd2kfd_init_zone_device(struct amdgpu_device *adev);
|
||||
#else
|
||||
static inline
|
||||
int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* KGD2KFD callbacks */
|
||||
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
|
||||
int kgd2kfd_resume_mm(struct mm_struct *mm);
|
||||
|
@ -372,6 +405,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd);
|
|||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
|
||||
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
|
||||
int kgd2kfd_check_and_lock_kfd(void);
|
||||
void kgd2kfd_unlock_kfd(void);
|
||||
#else
|
||||
static inline int kgd2kfd_init(void)
|
||||
{
|
||||
|
@ -437,5 +472,14 @@ static inline
|
|||
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int kgd2kfd_check_and_lock_kfd(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kgd2kfd_unlock_kfd(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
|
||||
|
|
|
@ -23,6 +23,149 @@
|
|||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_amdkfd_arcturus.h"
|
||||
#include "amdgpu_amdkfd_gfx_v9.h"
|
||||
#include "gc/gc_9_4_2_offset.h"
|
||||
#include "gc/gc_9_4_2_sh_mask.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
/*
|
||||
* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
|
||||
*
|
||||
* restore_dbg_registers is ignored here but is a general interface requirement
|
||||
* for devices that support GFXOFF and where the RLC save/restore list
|
||||
* does not support hw registers for debugging i.e. the driver has to manually
|
||||
* initialize the debug mode registers after it has disabled GFX off during the
|
||||
* debug session.
|
||||
*/
|
||||
static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
|
||||
bool restore_dbg_registers,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
|
||||
static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
|
||||
bool keep_trap_enabled,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,
|
||||
uint32_t trap_override,
|
||||
uint32_t *trap_mask_supported)
|
||||
{
|
||||
*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
|
||||
KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
|
||||
KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
|
||||
KFD_DBG_TRAP_MASK_FP_OVERFLOW |
|
||||
KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
|
||||
KFD_DBG_TRAP_MASK_FP_INEXACT |
|
||||
KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
|
||||
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
|
||||
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
|
||||
|
||||
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
|
||||
trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
|
||||
static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
uint32_t trap_override,
|
||||
uint32_t trap_mask_bits,
|
||||
uint32_t trap_mask_request,
|
||||
uint32_t *trap_mask_prev,
|
||||
uint32_t kfd_dbg_trap_cntl_prev)
|
||||
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
*trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
|
||||
trap_mask_bits = (trap_mask_bits & trap_mask_request) |
|
||||
(*trap_mask_prev & ~trap_mask_request);
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
|
||||
uint8_t wave_launch_mode,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
|
||||
static uint32_t kgd_gfx_aldebaran_set_address_watch(
|
||||
struct amdgpu_device *adev,
|
||||
uint64_t watch_address,
|
||||
uint32_t watch_address_mask,
|
||||
uint32_t watch_id,
|
||||
uint32_t watch_mode,
|
||||
uint32_t debug_vmid)
|
||||
{
|
||||
uint32_t watch_address_high;
|
||||
uint32_t watch_address_low;
|
||||
uint32_t watch_address_cntl;
|
||||
|
||||
watch_address_cntl = 0;
|
||||
watch_address_low = lower_32_bits(watch_address);
|
||||
watch_address_high = upper_32_bits(watch_address) & 0xffff;
|
||||
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
MODE,
|
||||
watch_mode);
|
||||
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
MASK,
|
||||
watch_address_mask >> 6);
|
||||
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
VALID,
|
||||
1);
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_high);
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_low);
|
||||
|
||||
return watch_address_cntl;
|
||||
}
|
||||
|
||||
static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device *adev,
|
||||
uint32_t watch_id)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct kfd2kgd_calls aldebaran_kfd2kgd = {
|
||||
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
|
||||
|
@ -42,5 +185,14 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
|
|||
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
|
||||
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
|
||||
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
|
||||
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
|
||||
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
|
||||
.disable_debug_trap = kgd_aldebaran_disable_debug_trap,
|
||||
.validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
|
||||
.set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
|
||||
.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
|
||||
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
|
||||
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
|
||||
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
|
||||
};
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_amdkfd_arcturus.h"
|
||||
#include "amdgpu_reset.h"
|
||||
#include "sdma0/sdma0_4_2_2_offset.h"
|
||||
#include "sdma0/sdma0_4_2_2_sh_mask.h"
|
||||
#include "sdma1/sdma1_4_2_2_offset.h"
|
||||
|
@ -48,6 +49,8 @@
|
|||
#include "amdgpu_amdkfd_gfx_v9.h"
|
||||
#include "gfxhub_v1_0.h"
|
||||
#include "mmhub_v9_4.h"
|
||||
#include "gc/gc_9_0_offset.h"
|
||||
#include "gc/gc_9_0_sh_mask.h"
|
||||
|
||||
#define HQD_N_REGS 56
|
||||
#define DUMP_REG(addr) do { \
|
||||
|
@ -276,6 +279,117 @@ int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper used to suspend/resume gfx pipe for image post process work to set
|
||||
* barrier behaviour.
|
||||
*/
|
||||
static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
|
||||
{
|
||||
int i, r = 0;
|
||||
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
|
||||
|
||||
if (!(ring && ring->sched.thread))
|
||||
continue;
|
||||
|
||||
/* stop secheduler and drain ring. */
|
||||
if (suspend) {
|
||||
drm_sched_stop(&ring->sched, NULL);
|
||||
r = amdgpu_fence_wait_empty(ring);
|
||||
if (r)
|
||||
goto out;
|
||||
} else {
|
||||
drm_sched_start(&ring->sched, false);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
/* return on resume or failure to drain rings. */
|
||||
if (!suspend || r)
|
||||
return r;
|
||||
|
||||
return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
|
||||
}
|
||||
|
||||
static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
|
||||
{
|
||||
uint32_t data;
|
||||
|
||||
WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
|
||||
|
||||
if (!down_read_trylock(&adev->reset_domain->sem))
|
||||
return;
|
||||
|
||||
amdgpu_amdkfd_suspend(adev, false);
|
||||
|
||||
if (suspend_resume_compute_scheduler(adev, true))
|
||||
goto out;
|
||||
|
||||
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
|
||||
data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
|
||||
!enable_waitcnt);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
|
||||
|
||||
out:
|
||||
suspend_resume_compute_scheduler(adev, false);
|
||||
|
||||
amdgpu_amdkfd_resume(adev, false);
|
||||
|
||||
up_read(&adev->reset_domain->sem);
|
||||
}
|
||||
|
||||
/*
|
||||
* restore_dbg_registers is ignored here but is a general interface requirement
|
||||
* for devices that support GFXOFF and where the RLC save/restore list
|
||||
* does not support hw registers for debugging i.e. the driver has to manually
|
||||
* initialize the debug mode registers after it has disabled GFX off during the
|
||||
* debug session.
|
||||
*/
|
||||
static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
|
||||
bool restore_dbg_registers,
|
||||
uint32_t vmid)
|
||||
{
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
set_barrier_auto_waitcnt(adev, true);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* keep_trap_enabled is ignored here but is a general interface requirement
|
||||
* for devices that support multi-process debugging where the performance
|
||||
* overhead from trap temporary setup needs to be bypassed when the debug
|
||||
* session has ended.
|
||||
*/
|
||||
static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
|
||||
bool keep_trap_enabled,
|
||||
uint32_t vmid)
|
||||
{
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
set_barrier_auto_waitcnt(adev, false);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
const struct kfd2kgd_calls arcturus_kfd2kgd = {
|
||||
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
|
||||
|
@ -294,6 +408,15 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
|
|||
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
|
||||
.set_vm_context_page_table_base =
|
||||
kgd_gfx_v9_set_vm_context_page_table_base,
|
||||
.enable_debug_trap = kgd_arcturus_enable_debug_trap,
|
||||
.disable_debug_trap = kgd_arcturus_disable_debug_trap,
|
||||
.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
|
||||
.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
|
||||
.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
|
||||
.set_address_watch = kgd_gfx_v9_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
|
||||
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
|
||||
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
|
||||
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
|
||||
};
|
||||
|
|
|
@ -0,0 +1,384 @@
|
|||
/*
|
||||
* Copyright 2021 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_amdkfd_gfx_v9.h"
|
||||
#include "gc/gc_9_4_3_offset.h"
|
||||
#include "gc/gc_9_4_3_sh_mask.h"
|
||||
#include "athub/athub_1_8_0_offset.h"
|
||||
#include "athub/athub_1_8_0_sh_mask.h"
|
||||
#include "oss/osssys_4_4_2_offset.h"
|
||||
#include "oss/osssys_4_4_2_sh_mask.h"
|
||||
#include "v9_structs.h"
|
||||
#include "soc15.h"
|
||||
#include "sdma/sdma_4_4_2_offset.h"
|
||||
#include "sdma/sdma_4_4_2_sh_mask.h"
|
||||
|
||||
static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
|
||||
{
|
||||
return (struct v9_sdma_mqd *)mqd;
|
||||
}
|
||||
|
||||
static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
|
||||
unsigned int engine_id,
|
||||
unsigned int queue_id)
|
||||
{
|
||||
uint32_t sdma_engine_reg_base =
|
||||
SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id),
|
||||
regSDMA_RLC0_RB_CNTL) -
|
||||
regSDMA_RLC0_RB_CNTL;
|
||||
uint32_t retval = sdma_engine_reg_base +
|
||||
queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL);
|
||||
|
||||
pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
|
||||
queue_id, retval);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t __user *wptr, struct mm_struct *mm)
|
||||
{
|
||||
struct v9_sdma_mqd *m;
|
||||
uint32_t sdma_rlc_reg_offset;
|
||||
unsigned long end_jiffies;
|
||||
uint32_t data;
|
||||
uint64_t data64;
|
||||
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
|
||||
|
||||
m = get_sdma_mqd(mqd);
|
||||
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
|
||||
m->sdma_queue_id);
|
||||
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
|
||||
m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK));
|
||||
|
||||
end_jiffies = msecs_to_jiffies(2000) + jiffies;
|
||||
while (true) {
|
||||
data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
|
||||
if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
|
||||
break;
|
||||
if (time_after(jiffies, end_jiffies)) {
|
||||
pr_err("SDMA RLC not idle in %s\n", __func__);
|
||||
return -ETIME;
|
||||
}
|
||||
usleep_range(500, 1000);
|
||||
}
|
||||
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET,
|
||||
m->sdmax_rlcx_doorbell_offset);
|
||||
|
||||
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL,
|
||||
ENABLE, 1);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR,
|
||||
m->sdmax_rlcx_rb_rptr);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI,
|
||||
m->sdmax_rlcx_rb_rptr_hi);
|
||||
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1);
|
||||
if (read_user_wptr(mm, wptr64, data64)) {
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
|
||||
lower_32_bits(data64));
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
|
||||
upper_32_bits(data64));
|
||||
} else {
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
|
||||
m->sdmax_rlcx_rb_rptr);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
|
||||
m->sdmax_rlcx_rb_rptr_hi);
|
||||
}
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0);
|
||||
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI,
|
||||
m->sdmax_rlcx_rb_base_hi);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO,
|
||||
m->sdmax_rlcx_rb_rptr_addr_lo);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI,
|
||||
m->sdmax_rlcx_rb_rptr_addr_hi);
|
||||
|
||||
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL,
|
||||
RB_ENABLE, 1);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
|
||||
uint32_t engine_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
{
|
||||
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
|
||||
engine_id, queue_id);
|
||||
uint32_t i = 0, reg;
|
||||
#undef HQD_N_REGS
|
||||
#define HQD_N_REGS (19+6+7+12)
|
||||
#define DUMP_REG(addr) do { \
|
||||
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
|
||||
break; \
|
||||
(*dump)[i][0] = (addr) << 2; \
|
||||
(*dump)[i++][1] = RREG32(addr); \
|
||||
} while (0)
|
||||
|
||||
*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
|
||||
if (*dump == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++)
|
||||
DUMP_REG(sdma_rlc_reg_offset + reg);
|
||||
for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++)
|
||||
DUMP_REG(sdma_rlc_reg_offset + reg);
|
||||
for (reg = regSDMA_RLC0_IB_SUB_REMAIN;
|
||||
reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++)
|
||||
DUMP_REG(sdma_rlc_reg_offset + reg);
|
||||
for (reg = regSDMA_RLC0_MIDCMD_DATA0;
|
||||
reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++)
|
||||
DUMP_REG(sdma_rlc_reg_offset + reg);
|
||||
|
||||
WARN_ON_ONCE(i != HQD_N_REGS);
|
||||
*n_regs = i;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
|
||||
{
|
||||
struct v9_sdma_mqd *m;
|
||||
uint32_t sdma_rlc_reg_offset;
|
||||
uint32_t sdma_rlc_rb_cntl;
|
||||
|
||||
m = get_sdma_mqd(mqd);
|
||||
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
|
||||
m->sdma_queue_id);
|
||||
|
||||
sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
|
||||
|
||||
if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
|
||||
unsigned int utimeout)
|
||||
{
|
||||
struct v9_sdma_mqd *m;
|
||||
uint32_t sdma_rlc_reg_offset;
|
||||
uint32_t temp;
|
||||
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
|
||||
|
||||
m = get_sdma_mqd(mqd);
|
||||
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
|
||||
m->sdma_queue_id);
|
||||
|
||||
temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
|
||||
temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK;
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp);
|
||||
|
||||
while (true) {
|
||||
temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
|
||||
if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
|
||||
break;
|
||||
if (time_after(jiffies, end_jiffies)) {
|
||||
pr_err("SDMA RLC not idle in %s\n", __func__);
|
||||
return -ETIME;
|
||||
}
|
||||
usleep_range(500, 1000);
|
||||
}
|
||||
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0);
|
||||
WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
|
||||
RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) |
|
||||
SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK);
|
||||
|
||||
m->sdmax_rlcx_rb_rptr =
|
||||
RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR);
|
||||
m->sdmax_rlcx_rb_rptr_hi =
|
||||
RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
|
||||
u32 pasid, unsigned int vmid, uint32_t xcc_inst)
|
||||
{
|
||||
unsigned long timeout;
|
||||
unsigned int reg;
|
||||
unsigned int phy_inst = GET_INST(GC, xcc_inst);
|
||||
/* Every two XCCs share one AID */
|
||||
unsigned int aid = phy_inst / 2;
|
||||
|
||||
/*
|
||||
* We have to assume that there is no outstanding mapping.
|
||||
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
|
||||
* a mapping is in progress or because a mapping finished
|
||||
* and the SW cleared it.
|
||||
* So the protocol is to always wait & clear.
|
||||
*/
|
||||
uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
|
||||
ATC_VMID0_PASID_MAPPING__VALID_MASK;
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(ATHUB, 0,
|
||||
regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
|
||||
|
||||
timeout = jiffies + msecs_to_jiffies(10);
|
||||
while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
|
||||
regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
|
||||
(1U << vmid))) {
|
||||
if (time_after(jiffies, timeout)) {
|
||||
pr_err("Fail to program VMID-PASID mapping\n");
|
||||
return -ETIME;
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(ATHUB, 0,
|
||||
regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
|
||||
1U << vmid);
|
||||
|
||||
reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX));
|
||||
/* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs,
|
||||
* and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1"
|
||||
* programs _LUT for XCC and "aid * 4" for AID where the XCC connects
|
||||
* to.
|
||||
*/
|
||||
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
|
||||
aid * 4 + (phy_inst % 2) + 1);
|
||||
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid,
|
||||
pasid_mapping);
|
||||
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
|
||||
aid * 4);
|
||||
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid,
|
||||
pasid_mapping);
|
||||
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct v9_mqd *get_mqd(void *mqd)
|
||||
{
|
||||
return (struct v9_mqd *)mqd;
|
||||
}
|
||||
|
||||
static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t __user *wptr, uint32_t wptr_shift,
|
||||
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
uint32_t *mqd_hqd;
|
||||
uint32_t reg, hqd_base, hqd_end, data;
|
||||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
|
||||
/* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
|
||||
mqd_hqd = &m->cp_mqd_base_addr_lo;
|
||||
hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR);
|
||||
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
|
||||
|
||||
for (reg = hqd_base; reg <= hqd_end; reg++)
|
||||
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
|
||||
|
||||
|
||||
/* Activate doorbell logic before triggering WPTR poll. */
|
||||
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
|
||||
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL),
|
||||
data);
|
||||
|
||||
if (wptr) {
|
||||
/* Don't read wptr with get_user because the user
|
||||
* context may not be accessible (if this function
|
||||
* runs in a work queue). Instead trigger a one-shot
|
||||
* polling read from memory in the CP. This assumes
|
||||
* that wptr is GPU-accessible in the queue's VMID via
|
||||
* ATC or SVM. WPTR==RPTR before starting the poll so
|
||||
* the CP starts fetching new commands from the right
|
||||
* place.
|
||||
*
|
||||
* Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
|
||||
* tricky. Assume that the queue didn't overflow. The
|
||||
* number of valid bits in the 32-bit RPTR depends on
|
||||
* the queue size. The remaining bits are taken from
|
||||
* the saved 64-bit WPTR. If the WPTR wrapped, add the
|
||||
* queue size.
|
||||
*/
|
||||
uint32_t queue_size =
|
||||
2 << REG_GET_FIELD(m->cp_hqd_pq_control,
|
||||
CP_HQD_PQ_CONTROL, QUEUE_SIZE);
|
||||
uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
|
||||
|
||||
if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
|
||||
guessed_wptr += queue_size;
|
||||
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
|
||||
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
|
||||
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO),
|
||||
lower_32_bits(guessed_wptr));
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI),
|
||||
upper_32_bits(guessed_wptr));
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR),
|
||||
lower_32_bits((uintptr_t)wptr));
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
|
||||
regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
|
||||
upper_32_bits((uintptr_t)wptr));
|
||||
WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1),
|
||||
(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
|
||||
queue_id));
|
||||
}
|
||||
|
||||
/* Start the EOP fetcher */
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
|
||||
REG_SET_FIELD(m->cp_hqd_eop_rptr,
|
||||
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
|
||||
|
||||
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data);
|
||||
|
||||
kgd_gfx_v9_release_queue(adev, inst);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
|
||||
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_gfx_v9_init_interrupts,
|
||||
.hqd_load = kgd_gfx_v9_4_3_hqd_load,
|
||||
.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
|
||||
.hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load,
|
||||
.hqd_dump = kgd_gfx_v9_hqd_dump,
|
||||
.hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump,
|
||||
.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
|
||||
.hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied,
|
||||
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
|
||||
.hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy,
|
||||
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
|
||||
.get_atc_vmid_pasid_mapping_info =
|
||||
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
|
||||
.set_vm_context_page_table_base =
|
||||
kgd_gfx_v9_set_vm_context_page_table_base,
|
||||
.program_trap_handler_settings =
|
||||
kgd_gfx_v9_program_trap_handler_settings
|
||||
};
|
|
@ -21,6 +21,7 @@
|
|||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_amdkfd_gfx_v10.h"
|
||||
#include "gc/gc_10_1_0_offset.h"
|
||||
#include "gc/gc_10_1_0_sh_mask.h"
|
||||
#include "athub/athub_2_0_0_offset.h"
|
||||
|
@ -31,6 +32,7 @@
|
|||
#include "v10_structs.h"
|
||||
#include "nv.h"
|
||||
#include "nvd.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
enum hqd_dequeue_request_type {
|
||||
NO_ACTION = 0,
|
||||
|
@ -79,7 +81,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
|
|||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base,
|
||||
uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases)
|
||||
uint32_t sh_mem_bases, uint32_t inst)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
|
||||
|
@ -91,7 +93,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
|
|||
}
|
||||
|
||||
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
||||
unsigned int vmid)
|
||||
unsigned int vmid, uint32_t inst)
|
||||
{
|
||||
/*
|
||||
* We have to assume that there is no outstanding mapping.
|
||||
|
@ -135,7 +137,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
|||
* but still works
|
||||
*/
|
||||
|
||||
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
|
||||
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t inst)
|
||||
{
|
||||
uint32_t mec;
|
||||
uint32_t pipe;
|
||||
|
@ -205,7 +208,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
|
|||
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t __user *wptr, uint32_t wptr_shift,
|
||||
uint32_t wptr_mask, struct mm_struct *mm)
|
||||
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
|
||||
{
|
||||
struct v10_compute_mqd *m;
|
||||
uint32_t *mqd_hqd;
|
||||
|
@ -286,9 +289,9 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
|
||||
static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t doorbell_off)
|
||||
uint32_t doorbell_off, uint32_t inst)
|
||||
{
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
|
||||
struct v10_compute_mqd *m;
|
||||
uint32_t mec, pipe;
|
||||
int r;
|
||||
|
@ -303,7 +306,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
|
||||
mec, pipe, queue_id);
|
||||
|
||||
spin_lock(&adev->gfx.kiq.ring_lock);
|
||||
spin_lock(&adev->gfx.kiq[0].ring_lock);
|
||||
r = amdgpu_ring_alloc(kiq_ring, 7);
|
||||
if (r) {
|
||||
pr_err("Failed to alloc KIQ (%d).\n", r);
|
||||
|
@ -330,7 +333,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
amdgpu_ring_commit(kiq_ring);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
spin_unlock(&adev->gfx.kiq[0].ring_lock);
|
||||
release_queue(adev);
|
||||
|
||||
return r;
|
||||
|
@ -338,7 +341,7 @@ out_unlock:
|
|||
|
||||
static int kgd_hqd_dump(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
|
||||
{
|
||||
uint32_t i = 0, reg;
|
||||
#define HQD_N_REGS 56
|
||||
|
@ -469,7 +472,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
|
|||
|
||||
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t act;
|
||||
bool retval = false;
|
||||
|
@ -510,7 +513,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
|
|||
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
enum hqd_dequeue_request_type type;
|
||||
unsigned long end_jiffies;
|
||||
|
@ -673,7 +676,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
|
|||
|
||||
static int kgd_wave_control_execute(struct amdgpu_device *adev,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd)
|
||||
uint32_t sq_cmd, uint32_t inst)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
|
@ -708,8 +711,295 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev,
|
|||
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
}
|
||||
|
||||
/*
|
||||
* GFX10 helper for wave launch stall requirements on debug trap setting.
|
||||
*
|
||||
* vmid:
|
||||
* Target VMID to stall/unstall.
|
||||
*
|
||||
* stall:
|
||||
* 0-unstall wave launch (enable), 1-stall wave launch (disable).
|
||||
* After wavefront launch has been stalled, allocated waves must drain from
|
||||
* SPI in order for debug trap settings to take effect on those waves.
|
||||
* This is roughly a ~3500 clock cycle wait on SPI where a read on
|
||||
* SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
|
||||
* KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
|
||||
*
|
||||
* NOTE: We can afford to clear the entire STALL_VMID field on unstall
|
||||
* because current GFX10 chips cannot support multi-process debugging due to
|
||||
* trap configuration and masking being limited to global scope. Always
|
||||
* assume single process conditions.
|
||||
*
|
||||
*/
|
||||
|
||||
#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110
|
||||
static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall)
|
||||
{
|
||||
uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
int i;
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
|
||||
stall ? 1 << vmid : 0);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
|
||||
|
||||
if (!stall)
|
||||
return;
|
||||
|
||||
for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
|
||||
RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
}
|
||||
|
||||
uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
|
||||
bool restore_dbg_registers,
|
||||
uint32_t vmid)
|
||||
{
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
/* assume gfx off is disabled for the debug session if rlc restore not supported. */
|
||||
if (restore_dbg_registers) {
|
||||
uint32_t data = 0;
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
|
||||
VMID_SEL, 1 << vmid);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
|
||||
TRAP_EN, 1);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
|
||||
|
||||
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
|
||||
|
||||
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
|
||||
bool keep_trap_enabled,
|
||||
uint32_t vmid)
|
||||
{
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
|
||||
|
||||
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
|
||||
uint32_t trap_override,
|
||||
uint32_t *trap_mask_supported)
|
||||
{
|
||||
*trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
|
||||
|
||||
/* The SPI_GDBG_TRAP_MASK register is global and affects all
|
||||
* processes. Only allow OR-ing the address-watch bit, since
|
||||
* this only affects processes under the debugger. Other bits
|
||||
* should stay 0 to avoid the debugger interfering with other
|
||||
* processes.
|
||||
*/
|
||||
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
uint32_t trap_override,
|
||||
uint32_t trap_mask_bits,
|
||||
uint32_t trap_mask_request,
|
||||
uint32_t *trap_mask_prev,
|
||||
uint32_t kfd_dbg_trap_cntl_prev)
|
||||
{
|
||||
uint32_t data, wave_cntl_prev;
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
|
||||
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
|
||||
*trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
|
||||
|
||||
trap_mask_bits = (trap_mask_bits & trap_mask_request) |
|
||||
(*trap_mask_prev & ~trap_mask_request);
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
|
||||
|
||||
/* We need to preserve wave launch mode stall settings. */
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
|
||||
uint8_t wave_launch_mode,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
bool is_mode_set = !!wave_launch_mode;
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
|
||||
VMID_MASK, is_mode_set ? 1 << vmid : 0);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
|
||||
MODE, is_mode_set ? wave_launch_mode : 0);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
|
||||
|
||||
kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
|
||||
uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
|
||||
uint64_t watch_address,
|
||||
uint32_t watch_address_mask,
|
||||
uint32_t watch_id,
|
||||
uint32_t watch_mode,
|
||||
uint32_t debug_vmid)
|
||||
{
|
||||
uint32_t watch_address_high;
|
||||
uint32_t watch_address_low;
|
||||
uint32_t watch_address_cntl;
|
||||
|
||||
watch_address_cntl = 0;
|
||||
|
||||
watch_address_low = lower_32_bits(watch_address);
|
||||
watch_address_high = upper_32_bits(watch_address) & 0xffff;
|
||||
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
VMID,
|
||||
debug_vmid);
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
MODE,
|
||||
watch_mode);
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
MASK,
|
||||
watch_address_mask >> 7);
|
||||
|
||||
/* Turning off this watch point until we set all the registers */
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
VALID,
|
||||
0);
|
||||
|
||||
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_cntl);
|
||||
|
||||
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_high);
|
||||
|
||||
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_low);
|
||||
|
||||
/* Enable the watch point */
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
VALID,
|
||||
1);
|
||||
|
||||
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_cntl);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
|
||||
uint32_t watch_id)
|
||||
{
|
||||
uint32_t watch_address_cntl;
|
||||
|
||||
watch_address_cntl = 0;
|
||||
|
||||
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_cntl);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
|
||||
* The values read are:
|
||||
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
|
||||
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
|
||||
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
|
||||
* gws_wait_time -- Wait Count for Global Wave Syncs.
|
||||
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
|
||||
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
|
||||
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
|
||||
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
|
||||
*/
|
||||
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
|
||||
uint32_t *wait_times)
|
||||
|
||||
{
|
||||
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
|
||||
}
|
||||
|
||||
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data)
|
||||
{
|
||||
*reg_data = wait_times;
|
||||
|
||||
/*
|
||||
* The CP cannont handle a 0 grace period input and will result in
|
||||
* an infinite grace period being set so set to 1 to prevent this.
|
||||
*/
|
||||
if (grace_period == 0)
|
||||
grace_period = 1;
|
||||
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
SCH_WAVE,
|
||||
grace_period);
|
||||
|
||||
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
|
||||
}
|
||||
|
||||
static void program_trap_handler_settings(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
|
||||
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
|
||||
uint32_t inst)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
|
||||
|
@ -750,5 +1040,14 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
|
|||
.get_atc_vmid_pasid_mapping_info =
|
||||
get_atc_vmid_pasid_mapping_info,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
|
||||
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
|
||||
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
|
||||
.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
|
||||
.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
|
||||
.set_address_watch = kgd_gfx_v10_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
|
||||
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
|
||||
.program_trap_handler_settings = program_trap_handler_settings,
|
||||
};
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
|
||||
bool restore_dbg_registers,
|
||||
uint32_t vmid);
|
||||
uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
|
||||
bool keep_trap_enabled,
|
||||
uint32_t vmid);
|
||||
int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
|
||||
uint32_t trap_override,
|
||||
uint32_t *trap_mask_supported);
|
||||
uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
uint32_t trap_override,
|
||||
uint32_t trap_mask_bits,
|
||||
uint32_t trap_mask_request,
|
||||
uint32_t *trap_mask_prev,
|
||||
uint32_t kfd_dbg_trap_cntl_prev);
|
||||
uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
|
||||
uint8_t wave_launch_mode,
|
||||
uint32_t vmid);
|
||||
uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
|
||||
uint64_t watch_address,
|
||||
uint32_t watch_address_mask,
|
||||
uint32_t watch_id,
|
||||
uint32_t watch_mode,
|
||||
uint32_t debug_vmid);
|
||||
uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
|
||||
uint32_t watch_id);
|
||||
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
|
||||
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data);
|
|
@ -22,6 +22,7 @@
|
|||
#include <linux/mmu_context.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_amdkfd_gfx_v10.h"
|
||||
#include "gc/gc_10_3_0_offset.h"
|
||||
#include "gc/gc_10_3_0_sh_mask.h"
|
||||
#include "oss/osssys_5_0_0_offset.h"
|
||||
|
@ -80,7 +81,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
|
|||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base,
|
||||
uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases)
|
||||
uint32_t sh_mem_bases, uint32_t inst)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
|
||||
|
@ -93,7 +94,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
|
|||
|
||||
/* ATC is defeatured on Sienna_Cichlid */
|
||||
static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
|
||||
unsigned int vmid)
|
||||
unsigned int vmid, uint32_t inst)
|
||||
{
|
||||
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
|
||||
|
||||
|
@ -105,7 +106,8 @@ static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id)
|
||||
static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t inst)
|
||||
{
|
||||
uint32_t mec;
|
||||
uint32_t pipe;
|
||||
|
@ -177,7 +179,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
|
|||
static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t __user *wptr, uint32_t wptr_shift,
|
||||
uint32_t wptr_mask, struct mm_struct *mm)
|
||||
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
|
||||
{
|
||||
struct v10_compute_mqd *m;
|
||||
uint32_t *mqd_hqd;
|
||||
|
@ -273,9 +275,9 @@ static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
|
|||
|
||||
static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t doorbell_off)
|
||||
uint32_t doorbell_off, uint32_t inst)
|
||||
{
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
|
||||
struct v10_compute_mqd *m;
|
||||
uint32_t mec, pipe;
|
||||
int r;
|
||||
|
@ -290,7 +292,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
|
|||
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
|
||||
mec, pipe, queue_id);
|
||||
|
||||
spin_lock(&adev->gfx.kiq.ring_lock);
|
||||
spin_lock(&adev->gfx.kiq[0].ring_lock);
|
||||
r = amdgpu_ring_alloc(kiq_ring, 7);
|
||||
if (r) {
|
||||
pr_err("Failed to alloc KIQ (%d).\n", r);
|
||||
|
@ -317,7 +319,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
|
|||
amdgpu_ring_commit(kiq_ring);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
spin_unlock(&adev->gfx.kiq[0].ring_lock);
|
||||
release_queue(adev);
|
||||
|
||||
return r;
|
||||
|
@ -325,7 +327,7 @@ out_unlock:
|
|||
|
||||
static int hqd_dump_v10_3(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
|
||||
{
|
||||
uint32_t i = 0, reg;
|
||||
#define HQD_N_REGS 56
|
||||
|
@ -456,7 +458,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
|
|||
|
||||
static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t act;
|
||||
bool retval = false;
|
||||
|
@ -498,7 +500,7 @@ static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
|
|||
static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
enum hqd_dequeue_request_type type;
|
||||
unsigned long end_jiffies;
|
||||
|
@ -586,7 +588,7 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
|
|||
|
||||
static int wave_control_execute_v10_3(struct amdgpu_device *adev,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd)
|
||||
uint32_t sq_cmd, uint32_t inst)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
|
@ -628,7 +630,8 @@ static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
|
||||
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
|
||||
uint32_t inst)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
|
||||
|
@ -652,142 +655,6 @@ static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
|
|||
unlock_srbm(adev);
|
||||
}
|
||||
|
||||
#if 0
|
||||
uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev,
|
||||
uint32_t trap_debug_wave_launch_mode,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
uint32_t orig_wave_cntl_value;
|
||||
uint32_t orig_stall_vmid;
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
|
||||
0,
|
||||
mmSPI_GDBG_WAVE_CNTL));
|
||||
orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
|
||||
SPI_GDBG_WAVE_CNTL,
|
||||
STALL_VMID);
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
|
||||
|
||||
data = 0;
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev)
|
||||
{
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev,
|
||||
uint32_t trap_override,
|
||||
uint32_t trap_mask)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
|
||||
|
||||
data = 0;
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
|
||||
EXCP_EN, trap_mask);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
|
||||
REPLACE, trap_override);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
|
||||
|
||||
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev,
|
||||
uint8_t wave_launch_mode,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
bool is_stall_mode;
|
||||
bool is_mode_set;
|
||||
|
||||
is_stall_mode = (wave_launch_mode == 4);
|
||||
is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
|
||||
VMID_MASK, is_mode_set ? 1 << vmid : 0);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
|
||||
MODE, is_mode_set ? wave_launch_mode : 0);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
|
||||
|
||||
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
|
||||
STALL_VMID, is_stall_mode ? 1 << vmid : 0);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
|
||||
STALL_RA, is_stall_mode ? 1 : 0);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
|
||||
* The values read are:
|
||||
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
|
||||
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
|
||||
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
|
||||
* gws_wait_time -- Wait Count for Global Wave Syncs.
|
||||
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
|
||||
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
|
||||
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
|
||||
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
|
||||
*/
|
||||
void get_iq_wait_times_v10_3(struct amdgpu_device *adev,
|
||||
uint32_t *wait_times)
|
||||
|
||||
{
|
||||
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
|
||||
}
|
||||
|
||||
void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data)
|
||||
{
|
||||
*reg_data = wait_times;
|
||||
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
SCH_WAVE,
|
||||
grace_period);
|
||||
|
||||
*reg_offset = mmCP_IQ_WAIT_TIME2;
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
|
||||
.program_sh_mem_settings = program_sh_mem_settings_v10_3,
|
||||
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3,
|
||||
|
@ -805,12 +672,13 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
|
|||
.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
|
||||
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
|
||||
#if 0
|
||||
.enable_debug_trap = enable_debug_trap_v10_3,
|
||||
.disable_debug_trap = disable_debug_trap_v10_3,
|
||||
.set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3,
|
||||
.set_wave_launch_mode = set_wave_launch_mode_v10_3,
|
||||
.get_iq_wait_times = get_iq_wait_times_v10_3,
|
||||
.build_grace_period_packet_info = build_grace_period_packet_info_v10_3,
|
||||
#endif
|
||||
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
|
||||
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
|
||||
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
|
||||
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
|
||||
.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
|
||||
.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
|
||||
.set_address_watch = kgd_gfx_v10_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v10_clear_address_watch
|
||||
};
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "soc15d.h"
|
||||
#include "v11_structs.h"
|
||||
#include "soc21.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
enum hqd_dequeue_request_type {
|
||||
NO_ACTION = 0,
|
||||
|
@ -78,7 +79,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
|
|||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base,
|
||||
uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases)
|
||||
uint32_t sh_mem_bases, uint32_t inst)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
|
||||
|
@ -89,7 +90,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
|
|||
}
|
||||
|
||||
static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
|
||||
unsigned int vmid)
|
||||
unsigned int vmid, uint32_t inst)
|
||||
{
|
||||
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
|
||||
|
||||
|
@ -101,7 +102,8 @@ static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int p
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
|
||||
static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t inst)
|
||||
{
|
||||
uint32_t mec;
|
||||
uint32_t pipe;
|
||||
|
@ -162,7 +164,7 @@ static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
|
|||
static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr,
|
||||
uint32_t wptr_shift, uint32_t wptr_mask,
|
||||
struct mm_struct *mm)
|
||||
struct mm_struct *mm, uint32_t inst)
|
||||
{
|
||||
struct v11_compute_mqd *m;
|
||||
uint32_t *mqd_hqd;
|
||||
|
@ -258,9 +260,9 @@ static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
|
|||
|
||||
static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t doorbell_off)
|
||||
uint32_t doorbell_off, uint32_t inst)
|
||||
{
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
|
||||
struct v11_compute_mqd *m;
|
||||
uint32_t mec, pipe;
|
||||
int r;
|
||||
|
@ -275,7 +277,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
|
|||
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
|
||||
mec, pipe, queue_id);
|
||||
|
||||
spin_lock(&adev->gfx.kiq.ring_lock);
|
||||
spin_lock(&adev->gfx.kiq[0].ring_lock);
|
||||
r = amdgpu_ring_alloc(kiq_ring, 7);
|
||||
if (r) {
|
||||
pr_err("Failed to alloc KIQ (%d).\n", r);
|
||||
|
@ -302,7 +304,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
|
|||
amdgpu_ring_commit(kiq_ring);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
spin_unlock(&adev->gfx.kiq[0].ring_lock);
|
||||
release_queue(adev);
|
||||
|
||||
return r;
|
||||
|
@ -310,7 +312,7 @@ out_unlock:
|
|||
|
||||
static int hqd_dump_v11(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
|
||||
{
|
||||
uint32_t i = 0, reg;
|
||||
#define HQD_N_REGS 56
|
||||
|
@ -445,7 +447,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
|
||||
uint32_t pipe_id, uint32_t queue_id)
|
||||
uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t act;
|
||||
bool retval = false;
|
||||
|
@ -486,7 +488,7 @@ static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
|
|||
static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
enum hqd_dequeue_request_type type;
|
||||
unsigned long end_jiffies;
|
||||
|
@ -571,7 +573,7 @@ static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
|
|||
|
||||
static int wave_control_execute_v11(struct amdgpu_device *adev,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd)
|
||||
uint32_t sq_cmd, uint32_t inst)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
|
@ -606,6 +608,183 @@ static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
|
|||
adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
|
||||
*
|
||||
* restore_dbg_registers is ignored here but is a general interface requirement
|
||||
* for devices that support GFXOFF and where the RLC save/restore list
|
||||
* does not support hw registers for debugging i.e. the driver has to manually
|
||||
* initialize the debug mode registers after it has disabled GFX off during the
|
||||
* debug session.
|
||||
*/
|
||||
static uint32_t kgd_gfx_v11_enable_debug_trap(struct amdgpu_device *adev,
|
||||
bool restore_dbg_registers,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
|
||||
static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
|
||||
bool keep_trap_enabled,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev,
|
||||
uint32_t trap_override,
|
||||
uint32_t *trap_mask_supported)
|
||||
{
|
||||
*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
|
||||
KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
|
||||
KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
|
||||
KFD_DBG_TRAP_MASK_FP_OVERFLOW |
|
||||
KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
|
||||
KFD_DBG_TRAP_MASK_FP_INEXACT |
|
||||
KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
|
||||
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
|
||||
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
|
||||
|
||||
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 4))
|
||||
*trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
|
||||
KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
|
||||
|
||||
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
|
||||
trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
|
||||
{
|
||||
uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
|
||||
uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
|
||||
uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
|
||||
KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
|
||||
KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
|
||||
KFD_DBG_TRAP_MASK_FP_OVERFLOW |
|
||||
KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
|
||||
KFD_DBG_TRAP_MASK_FP_INEXACT |
|
||||
KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
|
||||
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
|
||||
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
|
||||
uint32_t ret;
|
||||
|
||||
ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
|
||||
ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
|
||||
ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
|
||||
{
|
||||
uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
|
||||
|
||||
if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
|
||||
ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
|
||||
|
||||
if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
|
||||
ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
|
||||
static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
uint32_t trap_override,
|
||||
uint32_t trap_mask_bits,
|
||||
uint32_t trap_mask_request,
|
||||
uint32_t *trap_mask_prev,
|
||||
uint32_t kfd_dbg_trap_cntl_prev)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
*trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
|
||||
|
||||
data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
|
||||
data = trap_mask_map_sw_to_hw(data);
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
|
||||
uint8_t wave_launch_mode,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
|
||||
static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
|
||||
uint64_t watch_address,
|
||||
uint32_t watch_address_mask,
|
||||
uint32_t watch_id,
|
||||
uint32_t watch_mode,
|
||||
uint32_t debug_vmid)
|
||||
{
|
||||
uint32_t watch_address_high;
|
||||
uint32_t watch_address_low;
|
||||
uint32_t watch_address_cntl;
|
||||
|
||||
watch_address_cntl = 0;
|
||||
watch_address_low = lower_32_bits(watch_address);
|
||||
watch_address_high = upper_32_bits(watch_address) & 0xffff;
|
||||
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
MODE,
|
||||
watch_mode);
|
||||
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
MASK,
|
||||
watch_address_mask >> 7);
|
||||
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
VALID,
|
||||
1);
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_high);
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_low);
|
||||
|
||||
return watch_address_cntl;
|
||||
}
|
||||
|
||||
static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
|
||||
uint32_t watch_id)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
|
||||
.program_sh_mem_settings = program_sh_mem_settings_v11,
|
||||
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
|
||||
|
@ -622,4 +801,11 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
|
|||
.wave_control_execute = wave_control_execute_v11,
|
||||
.get_atc_vmid_pasid_mapping_info = NULL,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
|
||||
.enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
|
||||
.disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
|
||||
.validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
|
||||
.set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
|
||||
.set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
|
||||
.set_address_watch = kgd_gfx_v11_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v11_clear_address_watch
|
||||
};
|
||||
|
|
|
@ -78,7 +78,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
|
|||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base,
|
||||
uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases)
|
||||
uint32_t sh_mem_bases, uint32_t inst)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
|
||||
|
@ -91,7 +91,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
|
|||
}
|
||||
|
||||
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
||||
unsigned int vmid)
|
||||
unsigned int vmid, uint32_t inst)
|
||||
{
|
||||
/*
|
||||
* We have to assume that there is no outstanding mapping.
|
||||
|
@ -114,7 +114,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
|
||||
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t inst)
|
||||
{
|
||||
uint32_t mec;
|
||||
uint32_t pipe;
|
||||
|
@ -158,7 +159,7 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
|
|||
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t __user *wptr, uint32_t wptr_shift,
|
||||
uint32_t wptr_mask, struct mm_struct *mm)
|
||||
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
|
||||
{
|
||||
struct cik_mqd *m;
|
||||
uint32_t *mqd_hqd;
|
||||
|
@ -202,7 +203,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
|
||||
static int kgd_hqd_dump(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
|
||||
{
|
||||
uint32_t i = 0, reg;
|
||||
#define HQD_N_REGS (35+4)
|
||||
|
@ -318,7 +319,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
|
|||
|
||||
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t act;
|
||||
bool retval = false;
|
||||
|
@ -358,7 +359,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
|
|||
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t temp;
|
||||
enum hqd_dequeue_request_type type;
|
||||
|
@ -494,7 +495,7 @@ static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
|
|||
|
||||
static int kgd_wave_control_execute(struct amdgpu_device *adev,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd)
|
||||
uint32_t sq_cmd, uint32_t inst)
|
||||
{
|
||||
uint32_t data;
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
|
|||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base,
|
||||
uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases)
|
||||
uint32_t sh_mem_bases, uint32_t inst)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
|
||||
|
@ -85,7 +85,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
|
|||
}
|
||||
|
||||
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
||||
unsigned int vmid)
|
||||
unsigned int vmid, uint32_t inst)
|
||||
{
|
||||
/*
|
||||
* We have to assume that there is no outstanding mapping.
|
||||
|
@ -109,7 +109,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
|
||||
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t inst)
|
||||
{
|
||||
uint32_t mec;
|
||||
uint32_t pipe;
|
||||
|
@ -153,7 +154,7 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
|
|||
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t __user *wptr, uint32_t wptr_shift,
|
||||
uint32_t wptr_mask, struct mm_struct *mm)
|
||||
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
|
||||
{
|
||||
struct vi_mqd *m;
|
||||
uint32_t *mqd_hqd;
|
||||
|
@ -226,7 +227,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
|
||||
static int kgd_hqd_dump(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
|
||||
{
|
||||
uint32_t i = 0, reg;
|
||||
#define HQD_N_REGS (54+4)
|
||||
|
@ -350,7 +351,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
|
|||
|
||||
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t act;
|
||||
bool retval = false;
|
||||
|
@ -390,7 +391,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
|
|||
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t temp;
|
||||
enum hqd_dequeue_request_type type;
|
||||
|
@ -540,7 +541,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
|
|||
|
||||
static int kgd_wave_control_execute(struct amdgpu_device *adev,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd)
|
||||
uint32_t sq_cmd, uint32_t inst)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "soc15d.h"
|
||||
#include "gfx_v9_0.h"
|
||||
#include "amdgpu_amdkfd_gfx_v9.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
enum hqd_dequeue_request_type {
|
||||
NO_ACTION = 0,
|
||||
|
@ -46,29 +47,29 @@ enum hqd_dequeue_request_type {
|
|||
SAVE_WAVES
|
||||
};
|
||||
|
||||
static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
|
||||
uint32_t queue, uint32_t vmid)
|
||||
static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
|
||||
uint32_t queue, uint32_t vmid, uint32_t inst)
|
||||
{
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
soc15_grbm_select(adev, mec, pipe, queue, vmid);
|
||||
soc15_grbm_select(adev, mec, pipe, queue, vmid, GET_INST(GC, inst));
|
||||
}
|
||||
|
||||
static void unlock_srbm(struct amdgpu_device *adev)
|
||||
static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
|
||||
{
|
||||
soc15_grbm_select(adev, 0, 0, 0, 0);
|
||||
soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
}
|
||||
|
||||
static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
|
||||
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
|
||||
|
||||
lock_srbm(adev, mec, pipe, queue_id, 0);
|
||||
kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0, inst);
|
||||
}
|
||||
|
||||
static uint64_t get_queue_mask(struct amdgpu_device *adev,
|
||||
uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id)
|
||||
{
|
||||
unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
|
||||
|
@ -77,28 +78,28 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
|
|||
return 1ull << bit;
|
||||
}
|
||||
|
||||
static void release_queue(struct amdgpu_device *adev)
|
||||
void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst)
|
||||
{
|
||||
unlock_srbm(adev);
|
||||
kgd_gfx_v9_unlock_srbm(adev, inst);
|
||||
}
|
||||
|
||||
void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
|
||||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base,
|
||||
uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases)
|
||||
uint32_t sh_mem_bases, uint32_t inst)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
|
||||
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases);
|
||||
/* APE1 no longer exists on GFX9 */
|
||||
|
||||
unlock_srbm(adev);
|
||||
kgd_gfx_v9_unlock_srbm(adev, inst);
|
||||
}
|
||||
|
||||
int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
||||
unsigned int vmid)
|
||||
unsigned int vmid, uint32_t inst)
|
||||
{
|
||||
/*
|
||||
* We have to assume that there is no outstanding mapping.
|
||||
|
@ -156,7 +157,8 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
|||
* but still works
|
||||
*/
|
||||
|
||||
int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
|
||||
int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t inst)
|
||||
{
|
||||
uint32_t mec;
|
||||
uint32_t pipe;
|
||||
|
@ -164,13 +166,13 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
|
|||
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
|
||||
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
|
||||
|
||||
lock_srbm(adev, mec, pipe, 0, 0);
|
||||
kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0, inst);
|
||||
|
||||
WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmCPC_INT_CNTL,
|
||||
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
|
||||
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
|
||||
|
||||
unlock_srbm(adev);
|
||||
kgd_gfx_v9_unlock_srbm(adev, inst);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -220,7 +222,8 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
|
|||
int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t __user *wptr, uint32_t wptr_shift,
|
||||
uint32_t wptr_mask, struct mm_struct *mm)
|
||||
uint32_t wptr_mask, struct mm_struct *mm,
|
||||
uint32_t inst)
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
uint32_t *mqd_hqd;
|
||||
|
@ -228,21 +231,22 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
acquire_queue(adev, pipe_id, queue_id);
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
|
||||
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
|
||||
mqd_hqd = &m->cp_mqd_base_addr_lo;
|
||||
hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
|
||||
hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
|
||||
|
||||
for (reg = hqd_base;
|
||||
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
|
||||
reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
|
||||
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
|
||||
|
||||
|
||||
/* Activate doorbell logic before triggering WPTR poll. */
|
||||
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
|
||||
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL),
|
||||
data);
|
||||
|
||||
if (wptr) {
|
||||
/* Don't read wptr with get_user because the user
|
||||
|
@ -271,43 +275,43 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
|
||||
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
|
||||
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO),
|
||||
lower_32_bits(guessed_wptr));
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI),
|
||||
upper_32_bits(guessed_wptr));
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR),
|
||||
lower_32_bits((uintptr_t)wptr));
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
|
||||
upper_32_bits((uintptr_t)wptr));
|
||||
WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
|
||||
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
|
||||
(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
|
||||
}
|
||||
|
||||
/* Start the EOP fetcher */
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR),
|
||||
REG_SET_FIELD(m->cp_hqd_eop_rptr,
|
||||
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
|
||||
|
||||
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data);
|
||||
|
||||
release_queue(adev);
|
||||
kgd_gfx_v9_release_queue(adev, inst);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t doorbell_off)
|
||||
uint32_t doorbell_off, uint32_t inst)
|
||||
{
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring;
|
||||
struct v9_mqd *m;
|
||||
uint32_t mec, pipe;
|
||||
int r;
|
||||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
acquire_queue(adev, pipe_id, queue_id);
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
|
||||
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
|
||||
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
|
||||
|
@ -315,7 +319,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
|
||||
mec, pipe, queue_id);
|
||||
|
||||
spin_lock(&adev->gfx.kiq.ring_lock);
|
||||
spin_lock(&adev->gfx.kiq[inst].ring_lock);
|
||||
r = amdgpu_ring_alloc(kiq_ring, 7);
|
||||
if (r) {
|
||||
pr_err("Failed to alloc KIQ (%d).\n", r);
|
||||
|
@ -342,15 +346,15 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
|
|||
amdgpu_ring_commit(kiq_ring);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
release_queue(adev);
|
||||
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
|
||||
kgd_gfx_v9_release_queue(adev, inst);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
|
||||
{
|
||||
uint32_t i = 0, reg;
|
||||
#define HQD_N_REGS 56
|
||||
|
@ -365,13 +369,13 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
|
|||
if (*dump == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
acquire_queue(adev, pipe_id, queue_id);
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
|
||||
for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
|
||||
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
|
||||
for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
|
||||
reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
|
||||
DUMP_REG(reg);
|
||||
|
||||
release_queue(adev);
|
||||
kgd_gfx_v9_release_queue(adev, inst);
|
||||
|
||||
WARN_ON_ONCE(i != HQD_N_REGS);
|
||||
*n_regs = i;
|
||||
|
@ -481,23 +485,23 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
|
|||
|
||||
bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
uint32_t act;
|
||||
bool retval = false;
|
||||
uint32_t low, high;
|
||||
|
||||
acquire_queue(adev, pipe_id, queue_id);
|
||||
act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
act = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
|
||||
if (act) {
|
||||
low = lower_32_bits(queue_address >> 8);
|
||||
high = upper_32_bits(queue_address >> 8);
|
||||
|
||||
if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
|
||||
high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
|
||||
if (low == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE) &&
|
||||
high == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI))
|
||||
retval = true;
|
||||
}
|
||||
release_queue(adev);
|
||||
kgd_gfx_v9_release_queue(adev, inst);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -522,7 +526,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
|
|||
int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id)
|
||||
uint32_t queue_id, uint32_t inst)
|
||||
{
|
||||
enum hqd_dequeue_request_type type;
|
||||
unsigned long end_jiffies;
|
||||
|
@ -532,10 +536,10 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
|
|||
if (amdgpu_in_reset(adev))
|
||||
return -EIO;
|
||||
|
||||
acquire_queue(adev, pipe_id, queue_id);
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
|
||||
if (m->cp_hqd_vmid == 0)
|
||||
WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
|
||||
WREG32_FIELD15_RLC(GC, GET_INST(GC, inst), RLC_CP_SCHEDULERS, scheduler1, 0);
|
||||
|
||||
switch (reset_type) {
|
||||
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
|
||||
|
@ -552,22 +556,22 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
|
|||
break;
|
||||
}
|
||||
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type);
|
||||
|
||||
end_jiffies = (utimeout * HZ / 1000) + jiffies;
|
||||
while (true) {
|
||||
temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
|
||||
temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
|
||||
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
|
||||
break;
|
||||
if (time_after(jiffies, end_jiffies)) {
|
||||
pr_err("cp queue preemption time out.\n");
|
||||
release_queue(adev);
|
||||
kgd_gfx_v9_release_queue(adev, inst);
|
||||
return -ETIME;
|
||||
}
|
||||
usleep_range(500, 1000);
|
||||
}
|
||||
|
||||
release_queue(adev);
|
||||
kgd_gfx_v9_release_queue(adev, inst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -624,14 +628,14 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
|
|||
|
||||
int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd)
|
||||
uint32_t sq_cmd, uint32_t inst)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
|
||||
WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
|
||||
WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, gfx_index_val);
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, sq_cmd);
|
||||
|
||||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
|
||||
INSTANCE_BROADCAST_WRITES, 1);
|
||||
|
@ -640,12 +644,271 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
|
|||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
|
||||
SE_BROADCAST_WRITES, 1);
|
||||
|
||||
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
|
||||
WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, data);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* GFX9 helper for wave launch stall requirements on debug trap setting.
|
||||
*
|
||||
* vmid:
|
||||
* Target VMID to stall/unstall.
|
||||
*
|
||||
* stall:
|
||||
* 0-unstall wave launch (enable), 1-stall wave launch (disable).
|
||||
* After wavefront launch has been stalled, allocated waves must drain from
|
||||
* SPI in order for debug trap settings to take effect on those waves.
|
||||
* This is roughly a ~96 clock cycle wait on SPI where a read on
|
||||
* SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
|
||||
* KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
|
||||
*
|
||||
* NOTE: We can afford to clear the entire STALL_VMID field on unstall
|
||||
* because GFX9.4.1 cannot support multi-process debugging due to trap
|
||||
* configuration and masking being limited to global scope. Always assume
|
||||
* single process conditions.
|
||||
*/
|
||||
#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY 3
|
||||
void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
bool stall)
|
||||
{
|
||||
int i;
|
||||
uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
|
||||
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
|
||||
stall ? 1 << vmid : 0);
|
||||
else
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
|
||||
stall ? 1 : 0);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
|
||||
|
||||
if (!stall)
|
||||
return;
|
||||
|
||||
for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
|
||||
RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
}
|
||||
|
||||
/*
|
||||
* restore_dbg_registers is ignored here but is a general interface requirement
|
||||
* for devices that support GFXOFF and where the RLC save/restore list
|
||||
* does not support hw registers for debugging i.e. the driver has to manually
|
||||
* initialize the debug mode registers after it has disabled GFX off during the
|
||||
* debug session.
|
||||
*/
|
||||
uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
|
||||
bool restore_dbg_registers,
|
||||
uint32_t vmid)
|
||||
{
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* keep_trap_enabled is ignored here but is a general interface requirement
|
||||
* for devices that support multi-process debugging where the performance
|
||||
* overhead from trap temporary setup needs to be bypassed when the debug
|
||||
* session has ended.
|
||||
*/
|
||||
uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
|
||||
bool keep_trap_enabled,
|
||||
uint32_t vmid)
|
||||
{
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
|
||||
uint32_t trap_override,
|
||||
uint32_t *trap_mask_supported)
|
||||
{
|
||||
*trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
|
||||
|
||||
/* The SPI_GDBG_TRAP_MASK register is global and affects all
|
||||
* processes. Only allow OR-ing the address-watch bit, since
|
||||
* this only affects processes under the debugger. Other bits
|
||||
* should stay 0 to avoid the debugger interfering with other
|
||||
* processes.
|
||||
*/
|
||||
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
uint32_t trap_override,
|
||||
uint32_t trap_mask_bits,
|
||||
uint32_t trap_mask_request,
|
||||
uint32_t *trap_mask_prev,
|
||||
uint32_t kfd_dbg_cntl_prev)
|
||||
{
|
||||
uint32_t data, wave_cntl_prev;
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
|
||||
*trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
|
||||
|
||||
trap_mask_bits = (trap_mask_bits & trap_mask_request) |
|
||||
(*trap_mask_prev & ~trap_mask_request);
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
|
||||
|
||||
/* We need to preserve wave launch mode stall settings. */
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
|
||||
uint8_t wave_launch_mode,
|
||||
uint32_t vmid)
|
||||
{
|
||||
uint32_t data = 0;
|
||||
bool is_mode_set = !!wave_launch_mode;
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
|
||||
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
|
||||
VMID_MASK, is_mode_set ? 1 << vmid : 0);
|
||||
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
|
||||
MODE, is_mode_set ? wave_launch_mode : 0);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
|
||||
|
||||
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
|
||||
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
|
||||
uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
|
||||
uint64_t watch_address,
|
||||
uint32_t watch_address_mask,
|
||||
uint32_t watch_id,
|
||||
uint32_t watch_mode,
|
||||
uint32_t debug_vmid)
|
||||
{
|
||||
uint32_t watch_address_high;
|
||||
uint32_t watch_address_low;
|
||||
uint32_t watch_address_cntl;
|
||||
|
||||
watch_address_cntl = 0;
|
||||
|
||||
watch_address_low = lower_32_bits(watch_address);
|
||||
watch_address_high = upper_32_bits(watch_address) & 0xffff;
|
||||
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
VMID,
|
||||
debug_vmid);
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
MODE,
|
||||
watch_mode);
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
MASK,
|
||||
watch_address_mask >> 6);
|
||||
|
||||
/* Turning off this watch point until we set all the registers */
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
VALID,
|
||||
0);
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_cntl);
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_high);
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_low);
|
||||
|
||||
/* Enable the watch point */
|
||||
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
|
||||
TCP_WATCH0_CNTL,
|
||||
VALID,
|
||||
1);
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_cntl);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
|
||||
uint32_t watch_id)
|
||||
{
|
||||
uint32_t watch_address_cntl;
|
||||
|
||||
watch_address_cntl = 0;
|
||||
|
||||
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
|
||||
(watch_id * TCP_WATCH_STRIDE)),
|
||||
watch_address_cntl);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
|
||||
* The values read are:
|
||||
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
|
||||
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
|
||||
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
|
||||
* gws_wait_time -- Wait Count for Global Wave Syncs.
|
||||
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
|
||||
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
|
||||
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
|
||||
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
|
||||
*/
|
||||
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
|
||||
uint32_t *wait_times)
|
||||
|
||||
{
|
||||
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
|
||||
}
|
||||
|
||||
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint64_t page_table_base)
|
||||
{
|
||||
|
@ -682,10 +945,11 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
|
|||
* @queue_idx: Index of queue in the queue-map bit-field
|
||||
* @wave_cnt: Output parameter updated with number of waves in flight
|
||||
* @vmid: Output parameter updated with VMID of queue whose wave count
|
||||
* is being collected
|
||||
* is being collected
|
||||
* @inst: xcc's instance number on a multi-XCC setup
|
||||
*/
|
||||
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
|
||||
int *wave_cnt, int *vmid)
|
||||
int *wave_cnt, int *vmid, uint32_t inst)
|
||||
{
|
||||
int pipe_idx;
|
||||
int queue_slot;
|
||||
|
@ -700,12 +964,12 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
|
|||
*wave_cnt = 0;
|
||||
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
|
||||
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
|
||||
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
|
||||
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
|
||||
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
|
||||
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
|
||||
queue_slot);
|
||||
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
|
||||
if (*wave_cnt != 0)
|
||||
*vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) &
|
||||
*vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
|
||||
CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
|
||||
}
|
||||
|
||||
|
@ -718,9 +982,10 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
|
|||
* @adev: Handle of device from which to get number of waves in flight
|
||||
* @pasid: Identifies the process for which this query call is invoked
|
||||
* @pasid_wave_cnt: Output parameter updated with number of waves in flight that
|
||||
* belong to process with given pasid
|
||||
* belong to process with given pasid
|
||||
* @max_waves_per_cu: Output parameter updated with maximum number of waves
|
||||
* possible per Compute Unit
|
||||
* possible per Compute Unit
|
||||
* @inst: xcc's instance number on a multi-XCC setup
|
||||
*
|
||||
* Note: It's possible that the device has too many queues (oversubscription)
|
||||
* in which case a VMID could be remapped to a different PASID. This could lead
|
||||
|
@ -756,7 +1021,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
|
|||
* Reading registers referenced above involves programming GRBM appropriately
|
||||
*/
|
||||
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
|
||||
int *pasid_wave_cnt, int *max_waves_per_cu)
|
||||
int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
|
||||
{
|
||||
int qidx;
|
||||
int vmid;
|
||||
|
@ -772,13 +1037,13 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
|
|||
DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
|
||||
|
||||
lock_spi_csq_mutexes(adev);
|
||||
soc15_grbm_select(adev, 1, 0, 0, 0);
|
||||
soc15_grbm_select(adev, 1, 0, 0, 0, inst);
|
||||
|
||||
/*
|
||||
* Iterate through the shader engines and arrays of the device
|
||||
* to get number of waves in flight
|
||||
*/
|
||||
bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap,
|
||||
bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
|
||||
KGD_MAX_QUEUES);
|
||||
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
|
||||
adev->gfx.mec.num_queue_per_pipe;
|
||||
|
@ -787,8 +1052,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
|
|||
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
|
||||
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
|
||||
|
||||
amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
|
||||
queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS);
|
||||
amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
|
||||
queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
|
||||
|
||||
/*
|
||||
* Assumption: queue map encodes following schema: four
|
||||
|
@ -808,10 +1073,11 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
|
|||
continue;
|
||||
|
||||
/* Get number of waves in flight and aggregate them */
|
||||
get_wave_count(adev, qidx, &wave_cnt, &vmid);
|
||||
get_wave_count(adev, qidx, &wave_cnt, &vmid,
|
||||
inst);
|
||||
if (wave_cnt != 0) {
|
||||
pasid_tmp =
|
||||
RREG32(SOC15_REG_OFFSET(OSSSYS, 0,
|
||||
RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
|
||||
mmIH_VMID_0_LUT) + vmid);
|
||||
if (pasid_tmp == pasid)
|
||||
vmid_wave_cnt += wave_cnt;
|
||||
|
@ -820,8 +1086,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
|
|||
}
|
||||
}
|
||||
|
||||
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
|
||||
soc15_grbm_select(adev, 0, 0, 0, 0);
|
||||
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
|
||||
soc15_grbm_select(adev, 0, 0, 0, 0, inst);
|
||||
unlock_spi_csq_mutexes(adev);
|
||||
|
||||
/* Update the output parameters and return */
|
||||
|
@ -830,28 +1096,51 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
|
|||
adev->gfx.cu_info.max_waves_per_simd;
|
||||
}
|
||||
|
||||
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
|
||||
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data)
|
||||
{
|
||||
lock_srbm(adev, 0, 0, 0, vmid);
|
||||
*reg_data = wait_times;
|
||||
|
||||
/*
|
||||
* The CP cannont handle a 0 grace period input and will result in
|
||||
* an infinite grace period being set so set to 1 to prevent this.
|
||||
*/
|
||||
if (grace_period == 0)
|
||||
grace_period = 1;
|
||||
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
SCH_WAVE,
|
||||
grace_period);
|
||||
|
||||
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
|
||||
}
|
||||
|
||||
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
|
||||
{
|
||||
kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
|
||||
|
||||
/*
|
||||
* Program TBA registers
|
||||
*/
|
||||
WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO,
|
||||
lower_32_bits(tba_addr >> 8));
|
||||
WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI,
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI,
|
||||
upper_32_bits(tba_addr >> 8));
|
||||
|
||||
/*
|
||||
* Program TMA registers
|
||||
*/
|
||||
WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO,
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_LO,
|
||||
lower_32_bits(tma_addr >> 8));
|
||||
WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI,
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_HI,
|
||||
upper_32_bits(tma_addr >> 8));
|
||||
|
||||
unlock_srbm(adev);
|
||||
kgd_gfx_v9_unlock_srbm(adev, inst);
|
||||
}
|
||||
|
||||
const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
|
||||
|
@ -871,6 +1160,15 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
|
|||
.get_atc_vmid_pasid_mapping_info =
|
||||
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
|
||||
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
|
||||
.enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
|
||||
.disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
|
||||
.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
|
||||
.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
|
||||
.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
|
||||
.set_address_watch = kgd_gfx_v9_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
|
||||
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
|
||||
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
|
||||
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
|
||||
};
|
||||
|
|
|
@ -20,41 +20,81 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
|
||||
uint32_t sh_mem_config,
|
||||
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
|
||||
uint32_t sh_mem_bases);
|
||||
uint32_t sh_mem_bases, uint32_t inst);
|
||||
int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
|
||||
unsigned int vmid);
|
||||
int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id);
|
||||
unsigned int vmid, uint32_t inst);
|
||||
int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t inst);
|
||||
int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t __user *wptr,
|
||||
uint32_t wptr_shift, uint32_t wptr_mask,
|
||||
struct mm_struct *mm);
|
||||
struct mm_struct *mm, uint32_t inst);
|
||||
int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t doorbell_off);
|
||||
uint32_t doorbell_off, uint32_t inst);
|
||||
int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
|
||||
bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
|
||||
uint64_t queue_address, uint32_t pipe_id,
|
||||
uint32_t queue_id);
|
||||
uint32_t queue_id, uint32_t inst);
|
||||
int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
|
||||
enum kfd_preempt_type reset_type,
|
||||
unsigned int utimeout, uint32_t pipe_id,
|
||||
uint32_t queue_id);
|
||||
uint32_t queue_id, uint32_t inst);
|
||||
int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
|
||||
uint32_t gfx_index_val,
|
||||
uint32_t sq_cmd);
|
||||
uint32_t sq_cmd, uint32_t inst);
|
||||
bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
|
||||
uint8_t vmid, uint16_t *p_pasid);
|
||||
|
||||
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint64_t page_table_base);
|
||||
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
|
||||
int *pasid_wave_cnt, int *max_waves_per_cu);
|
||||
int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
|
||||
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
|
||||
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
|
||||
uint32_t inst);
|
||||
void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
|
||||
uint32_t queue_id, uint32_t inst);
|
||||
uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
|
||||
uint32_t pipe_id, uint32_t queue_id);
|
||||
void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst);
|
||||
void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
bool stall);
|
||||
uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
|
||||
bool restore_dbg_registers,
|
||||
uint32_t vmid);
|
||||
uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
|
||||
bool keep_trap_enabled,
|
||||
uint32_t vmid);
|
||||
int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
|
||||
uint32_t trap_override,
|
||||
uint32_t *trap_mask_supported);
|
||||
uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
|
||||
uint8_t wave_launch_mode,
|
||||
uint32_t vmid);
|
||||
uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
|
||||
uint32_t vmid,
|
||||
uint32_t trap_override,
|
||||
uint32_t trap_mask_bits,
|
||||
uint32_t trap_mask_request,
|
||||
uint32_t *trap_mask_prev,
|
||||
uint32_t kfd_dbg_trap_cntl_prev);
|
||||
uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
|
||||
uint64_t watch_address,
|
||||
uint32_t watch_address_mask,
|
||||
uint32_t watch_id,
|
||||
uint32_t watch_mode,
|
||||
uint32_t debug_vmid);
|
||||
uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
|
||||
uint32_t watch_id);
|
||||
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
|
||||
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data);
|
||||
|
|
|
@ -35,7 +35,9 @@
|
|||
#include "amdgpu_dma_buf.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_smi_events.h"
|
||||
#include <drm/ttm/ttm_tt.h>
|
||||
|
||||
/* Userptr restore delay, just long enough to allow consecutive VM
|
||||
* changes to accumulate
|
||||
|
@ -110,13 +112,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
|||
struct sysinfo si;
|
||||
uint64_t mem;
|
||||
|
||||
if (kfd_mem_limit.max_system_mem_limit)
|
||||
return;
|
||||
|
||||
si_meminfo(&si);
|
||||
mem = si.freeram - si.freehigh;
|
||||
mem *= si.mem_unit;
|
||||
|
||||
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
|
||||
kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
|
||||
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
|
||||
kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
|
||||
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
|
||||
(kfd_mem_limit.max_system_mem_limit >> 20),
|
||||
(kfd_mem_limit.max_ttm_mem_limit >> 20));
|
||||
|
@ -148,16 +153,20 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
|
|||
* @size: Size of buffer, in bytes, encapsulated by B0. This should be
|
||||
* equivalent to amdgpu_bo_size(BO)
|
||||
* @alloc_flag: Flag used in allocating a BO as noted above
|
||||
* @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
|
||||
* managed as one compute node in driver for app
|
||||
*
|
||||
* Return: returns -ENOMEM in case of error, ZERO otherwise
|
||||
* Return:
|
||||
* returns -ENOMEM in case of error, ZERO otherwise
|
||||
*/
|
||||
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 alloc_flag)
|
||||
uint64_t size, u32 alloc_flag, int8_t xcp_id)
|
||||
{
|
||||
uint64_t reserved_for_pt =
|
||||
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
|
||||
size_t system_mem_needed, ttm_mem_needed, vram_needed;
|
||||
int ret = 0;
|
||||
uint64_t vram_size = 0;
|
||||
|
||||
system_mem_needed = 0;
|
||||
ttm_mem_needed = 0;
|
||||
|
@ -172,6 +181,17 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
* 2M BO chunk.
|
||||
*/
|
||||
vram_needed = size;
|
||||
/*
|
||||
* For GFX 9.4.3, get the VRAM size from XCP structs
|
||||
*/
|
||||
if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
|
||||
return -EINVAL;
|
||||
|
||||
vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
|
||||
if (adev->gmc.is_app_apu) {
|
||||
system_mem_needed = size;
|
||||
ttm_mem_needed = size;
|
||||
}
|
||||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
|
||||
system_mem_needed = size;
|
||||
} else if (!(alloc_flag &
|
||||
|
@ -191,8 +211,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
|
||||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) ||
|
||||
(adev && adev->kfd.vram_used + vram_needed >
|
||||
adev->gmc.real_vram_size - reserved_for_pt)) {
|
||||
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
|
||||
vram_size - reserved_for_pt)) {
|
||||
ret = -ENOMEM;
|
||||
goto release;
|
||||
}
|
||||
|
@ -202,9 +222,11 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
*/
|
||||
WARN_ONCE(vram_needed && !adev,
|
||||
"adev reference can't be null when vram is used");
|
||||
if (adev) {
|
||||
adev->kfd.vram_used += vram_needed;
|
||||
adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
|
||||
if (adev && xcp_id >= 0) {
|
||||
adev->kfd.vram_used[xcp_id] += vram_needed;
|
||||
adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
|
||||
vram_needed :
|
||||
ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
|
||||
}
|
||||
kfd_mem_limit.system_mem_used += system_mem_needed;
|
||||
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
|
||||
|
@ -215,7 +237,7 @@ release:
|
|||
}
|
||||
|
||||
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 alloc_flag)
|
||||
uint64_t size, u32 alloc_flag, int8_t xcp_id)
|
||||
{
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
|
@ -225,9 +247,19 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
|
|||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
|
||||
WARN_ONCE(!adev,
|
||||
"adev reference can't be null when alloc mem flags vram is set");
|
||||
if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
|
||||
goto release;
|
||||
|
||||
if (adev) {
|
||||
adev->kfd.vram_used -= size;
|
||||
adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
|
||||
adev->kfd.vram_used[xcp_id] -= size;
|
||||
if (adev->gmc.is_app_apu) {
|
||||
adev->kfd.vram_used_aligned[xcp_id] -= size;
|
||||
kfd_mem_limit.system_mem_used -= size;
|
||||
kfd_mem_limit.ttm_mem_used -= size;
|
||||
} else {
|
||||
adev->kfd.vram_used_aligned[xcp_id] -=
|
||||
ALIGN(size, VRAM_AVAILABLITY_ALIGN);
|
||||
}
|
||||
}
|
||||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
|
||||
kfd_mem_limit.system_mem_used -= size;
|
||||
|
@ -237,8 +269,8 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
|
|||
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
|
||||
goto release;
|
||||
}
|
||||
WARN_ONCE(adev && adev->kfd.vram_used < 0,
|
||||
"KFD VRAM memory accounting unbalanced");
|
||||
WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
|
||||
"KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
|
||||
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
|
||||
"KFD TTM memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||
|
@ -254,14 +286,16 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
|
|||
u32 alloc_flags = bo->kfd_bo->alloc_flags;
|
||||
u64 size = amdgpu_bo_size(bo);
|
||||
|
||||
amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
|
||||
amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
|
||||
bo->xcp_id);
|
||||
|
||||
kfree(bo->kfd_bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
|
||||
* create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
|
||||
* about USERPTR or DOOREBELL or MMIO BO.
|
||||
*
|
||||
* @adev: Device for which dmamap BO is being created
|
||||
* @mem: BO of peer device that is being DMA mapped. Provides parameters
|
||||
* in building the dmamap BO
|
||||
|
@ -285,7 +319,7 @@ create_dmamap_sg_bo(struct amdgpu_device *adev,
|
|||
|
||||
ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
|
||||
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
|
||||
ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
|
||||
ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
|
||||
|
||||
amdgpu_bo_unreserve(mem->bo);
|
||||
|
||||
|
@ -527,6 +561,12 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
|
|||
{
|
||||
struct ttm_operation_ctx ctx = {.interruptible = true};
|
||||
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
|
||||
int ret;
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
|
@ -659,11 +699,10 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
|
|||
static void
|
||||
kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
|
||||
{
|
||||
struct ttm_operation_ctx ctx = {.interruptible = true};
|
||||
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
|
||||
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
/* This is a no-op. We don't want to trigger eviction fences when
|
||||
* unmapping DMABufs. Therefore the invalidation (moving to system
|
||||
* domain) is done in kfd_mem_dmamap_dmabuf.
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -804,7 +843,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
|
|||
* if peer device has large BAR. In contrast, access over xGMI is
|
||||
* allowed for both small and large BAR configurations of peer device
|
||||
*/
|
||||
if ((adev != bo_adev) &&
|
||||
if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
|
||||
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
|
||||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
|
||||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
|
||||
|
@ -1599,23 +1638,42 @@ out_unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
|
||||
uint8_t xcp_id)
|
||||
{
|
||||
uint64_t reserved_for_pt =
|
||||
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
|
||||
ssize_t available;
|
||||
uint64_t vram_available, system_mem_available, ttm_mem_available;
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
available = adev->gmc.real_vram_size
|
||||
- adev->kfd.vram_used_aligned
|
||||
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
|
||||
- adev->kfd.vram_used_aligned[xcp_id]
|
||||
- atomic64_read(&adev->vram_pin_size)
|
||||
- reserved_for_pt;
|
||||
|
||||
if (adev->gmc.is_app_apu) {
|
||||
system_mem_available = no_system_mem_limit ?
|
||||
kfd_mem_limit.max_system_mem_limit :
|
||||
kfd_mem_limit.max_system_mem_limit -
|
||||
kfd_mem_limit.system_mem_used;
|
||||
|
||||
ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
|
||||
kfd_mem_limit.ttm_mem_used;
|
||||
|
||||
available = min3(system_mem_available, ttm_mem_available,
|
||||
vram_available);
|
||||
available = ALIGN_DOWN(available, PAGE_SIZE);
|
||||
} else {
|
||||
available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
|
||||
}
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
if (available < 0)
|
||||
available = 0;
|
||||
|
||||
return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
|
||||
return available;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
|
@ -1624,6 +1682,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
uint64_t *offset, uint32_t flags, bool criu_resume)
|
||||
{
|
||||
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
|
||||
struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
|
||||
enum ttm_bo_type bo_type = ttm_bo_type_device;
|
||||
struct sg_table *sg = NULL;
|
||||
uint64_t user_addr = 0;
|
||||
|
@ -1631,6 +1690,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
struct drm_gem_object *gobj = NULL;
|
||||
u32 domain, alloc_domain;
|
||||
uint64_t aligned_size;
|
||||
int8_t xcp_id = -1;
|
||||
u64 alloc_flags;
|
||||
int ret;
|
||||
|
||||
|
@ -1639,9 +1699,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
*/
|
||||
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
|
||||
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
|
||||
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
|
||||
|
||||
if (adev->gmc.is_app_apu) {
|
||||
domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
alloc_flags = 0;
|
||||
} else {
|
||||
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
|
||||
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
|
||||
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
|
||||
}
|
||||
xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id;
|
||||
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
|
||||
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
alloc_flags = 0;
|
||||
|
@ -1693,17 +1761,19 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
|
||||
amdgpu_sync_create(&(*mem)->sync);
|
||||
|
||||
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
|
||||
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
|
||||
xcp_id);
|
||||
if (ret) {
|
||||
pr_debug("Insufficient memory\n");
|
||||
goto err_reserve_limit;
|
||||
}
|
||||
|
||||
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
|
||||
va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));
|
||||
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
|
||||
va, (*mem)->aql_queue ? size << 1 : size,
|
||||
domain_string(alloc_domain), xcp_id);
|
||||
|
||||
ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
|
||||
bo_type, NULL, &gobj);
|
||||
bo_type, NULL, &gobj, xcp_id + 1);
|
||||
if (ret) {
|
||||
pr_debug("Failed to create BO on domain %s. ret %d\n",
|
||||
domain_string(alloc_domain), ret);
|
||||
|
@ -1728,6 +1798,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
(*mem)->domain = domain;
|
||||
(*mem)->mapped_to_gpu_memory = 0;
|
||||
(*mem)->process_info = avm->process_info;
|
||||
|
||||
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
|
||||
|
||||
if (user_addr) {
|
||||
|
@ -1759,7 +1830,7 @@ err_node_allow:
|
|||
/* Don't unreserve system mem limit twice */
|
||||
goto err_reserve_limit;
|
||||
err_bo_create:
|
||||
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
|
||||
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
|
||||
err_reserve_limit:
|
||||
mutex_destroy(&(*mem)->lock);
|
||||
if (gobj)
|
||||
|
@ -1855,11 +1926,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
|||
}
|
||||
|
||||
/* Update the size of the BO being freed if it was allocated from
|
||||
* VRAM and is not imported.
|
||||
* VRAM and is not imported. For APP APU VRAM allocations are done
|
||||
* in GTT domain
|
||||
*/
|
||||
if (size) {
|
||||
if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
|
||||
(!is_imported))
|
||||
if (!is_imported &&
|
||||
(mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
|
||||
(adev->gmc.is_app_apu &&
|
||||
mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
|
||||
*size = bo_size;
|
||||
else
|
||||
*size = 0;
|
||||
|
@ -2282,8 +2356,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
|
|||
(*mem)->dmabuf = dma_buf;
|
||||
(*mem)->bo = bo;
|
||||
(*mem)->va = va;
|
||||
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
|
||||
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
|
||||
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
|
||||
|
||||
(*mem)->mapped_to_gpu_memory = 0;
|
||||
(*mem)->process_info = avm->process_info;
|
||||
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
|
||||
|
@ -2445,7 +2520,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
|
|||
ret = -EAGAIN;
|
||||
goto unlock_out;
|
||||
}
|
||||
mem->invalid = 0;
|
||||
/* set mem valid if mem has hmm range associated */
|
||||
if (mem->range)
|
||||
mem->invalid = 0;
|
||||
}
|
||||
|
||||
unlock_out:
|
||||
|
@ -2577,8 +2654,15 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
|
|||
list_for_each_entry_safe(mem, tmp_mem,
|
||||
&process_info->userptr_inval_list,
|
||||
validate_list.head) {
|
||||
bool valid = amdgpu_ttm_tt_get_user_pages_done(
|
||||
mem->bo->tbo.ttm, mem->range);
|
||||
bool valid;
|
||||
|
||||
/* keep mem without hmm range at userptr_inval_list */
|
||||
if (!mem->range)
|
||||
continue;
|
||||
|
||||
/* Only check mem with hmm range associated */
|
||||
valid = amdgpu_ttm_tt_get_user_pages_done(
|
||||
mem->bo->tbo.ttm, mem->range);
|
||||
|
||||
mem->range = NULL;
|
||||
if (!valid) {
|
||||
|
@ -2586,7 +2670,12 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
|
|||
ret = -EAGAIN;
|
||||
continue;
|
||||
}
|
||||
WARN(mem->invalid, "Valid BO is marked invalid");
|
||||
|
||||
if (mem->invalid) {
|
||||
WARN(1, "Valid BO is marked invalid");
|
||||
ret = -EAGAIN;
|
||||
continue;
|
||||
}
|
||||
|
||||
list_move_tail(&mem->validate_list.head,
|
||||
&process_info->userptr_valid_list);
|
||||
|
|
|
@ -272,6 +272,7 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
|
|||
break;
|
||||
case ATOM_DGPU_VRAM_TYPE_HBM2:
|
||||
case ATOM_DGPU_VRAM_TYPE_HBM2E:
|
||||
case ATOM_DGPU_VRAM_TYPE_HBM3:
|
||||
vram_type = AMDGPU_VRAM_TYPE_HBM;
|
||||
break;
|
||||
case ATOM_DGPU_VRAM_TYPE_GDDR6:
|
||||
|
|
|
@ -104,9 +104,8 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
|
|||
adev->bios = NULL;
|
||||
vram_base = pci_resource_start(adev->pdev, 0);
|
||||
bios = ioremap_wc(vram_base, size);
|
||||
if (!bios) {
|
||||
if (!bios)
|
||||
return false;
|
||||
}
|
||||
|
||||
adev->bios = kmalloc(size, GFP_KERNEL);
|
||||
if (!adev->bios) {
|
||||
|
@ -133,9 +132,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
|
|||
adev->bios = NULL;
|
||||
/* XXX: some cards may return 0 for rom size? ddx has a workaround */
|
||||
bios = pci_map_rom(adev->pdev, &size);
|
||||
if (!bios) {
|
||||
if (!bios)
|
||||
return false;
|
||||
}
|
||||
|
||||
adev->bios = kzalloc(size, GFP_KERNEL);
|
||||
if (adev->bios == NULL) {
|
||||
|
@ -168,9 +166,9 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
|
|||
header[AMD_VBIOS_SIGNATURE_END] = 0;
|
||||
|
||||
if ((!AMD_IS_VALID_VBIOS(header)) ||
|
||||
0 != memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
|
||||
AMD_VBIOS_SIGNATURE,
|
||||
strlen(AMD_VBIOS_SIGNATURE)))
|
||||
memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
|
||||
AMD_VBIOS_SIGNATURE,
|
||||
strlen(AMD_VBIOS_SIGNATURE)) != 0)
|
||||
return false;
|
||||
|
||||
/* valid vbios, go on */
|
||||
|
@ -264,7 +262,7 @@ static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
|
|||
|
||||
status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
|
||||
if (ACPI_FAILURE(status)) {
|
||||
printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
|
||||
DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
|
@ -363,7 +361,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
|
|||
struct acpi_table_header *hdr;
|
||||
acpi_size tbl_size;
|
||||
UEFI_ACPI_VFCT *vfct;
|
||||
unsigned offset;
|
||||
unsigned int offset;
|
||||
|
||||
if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
|
||||
return false;
|
||||
|
|
|
@ -593,11 +593,20 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
|
|||
|
||||
switch (val) {
|
||||
default:
|
||||
case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break;
|
||||
case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break;
|
||||
case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break;
|
||||
case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break;
|
||||
case DRM_MODE_SCALE_NONE:
|
||||
rmx_type = RMX_OFF;
|
||||
break;
|
||||
case DRM_MODE_SCALE_CENTER:
|
||||
rmx_type = RMX_CENTER;
|
||||
break;
|
||||
case DRM_MODE_SCALE_ASPECT:
|
||||
rmx_type = RMX_ASPECT;
|
||||
break;
|
||||
case DRM_MODE_SCALE_FULLSCREEN:
|
||||
rmx_type = RMX_FULL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (amdgpu_encoder->rmx_type == rmx_type)
|
||||
return 0;
|
||||
|
||||
|
@ -799,12 +808,21 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
|
|||
}
|
||||
|
||||
switch (value) {
|
||||
case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break;
|
||||
case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break;
|
||||
case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break;
|
||||
case DRM_MODE_SCALE_NONE:
|
||||
rmx_type = RMX_OFF;
|
||||
break;
|
||||
case DRM_MODE_SCALE_CENTER:
|
||||
rmx_type = RMX_CENTER;
|
||||
break;
|
||||
case DRM_MODE_SCALE_ASPECT:
|
||||
rmx_type = RMX_ASPECT;
|
||||
break;
|
||||
default:
|
||||
case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break;
|
||||
case DRM_MODE_SCALE_FULLSCREEN:
|
||||
rmx_type = RMX_FULL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (amdgpu_encoder->rmx_type == rmx_type)
|
||||
return 0;
|
||||
|
||||
|
@ -1127,7 +1145,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
|
|||
/* assume digital unless load detected otherwise */
|
||||
amdgpu_connector->use_digital = true;
|
||||
lret = encoder_funcs->detect(encoder, connector);
|
||||
DRM_DEBUG_KMS("load_detect %x returned: %x\n",encoder->encoder_type,lret);
|
||||
DRM_DEBUG_KMS("load_detect %x returned: %x\n",
|
||||
encoder->encoder_type, lret);
|
||||
if (lret == connector_status_connected)
|
||||
amdgpu_connector->use_digital = false;
|
||||
}
|
||||
|
@ -1991,7 +2010,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
|
|||
if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
|
||||
if (i2c_bus->valid) {
|
||||
connector->polled = DRM_CONNECTOR_POLL_CONNECT |
|
||||
DRM_CONNECTOR_POLL_DISCONNECT;
|
||||
DRM_CONNECTOR_POLL_DISCONNECT;
|
||||
}
|
||||
} else
|
||||
connector->polled = DRM_CONNECTOR_POLL_HPD;
|
||||
|
|
|
@ -112,6 +112,9 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
|
|||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
|
||||
return -EINVAL;
|
||||
|
||||
++(num_ibs[r]);
|
||||
p->gang_leader_idx = r;
|
||||
return 0;
|
||||
|
@ -192,7 +195,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
|
|||
uint64_t *chunk_array_user;
|
||||
uint64_t *chunk_array;
|
||||
uint32_t uf_offset = 0;
|
||||
unsigned int size;
|
||||
size_t size;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
|
@ -285,6 +288,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
|
|||
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
|
||||
case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -393,7 +397,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
|
|||
{
|
||||
struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
|
||||
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
||||
unsigned num_deps;
|
||||
unsigned int num_deps;
|
||||
int i, r;
|
||||
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
|
@ -464,7 +468,7 @@ static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
|
|||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
|
||||
unsigned num_deps;
|
||||
unsigned int num_deps;
|
||||
int i, r;
|
||||
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
|
@ -482,7 +486,7 @@ static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
|
|||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
|
||||
unsigned num_deps;
|
||||
unsigned int num_deps;
|
||||
int i, r;
|
||||
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
|
@ -502,7 +506,7 @@ static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
|
|||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
|
||||
unsigned num_deps;
|
||||
unsigned int num_deps;
|
||||
int i;
|
||||
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
|
@ -536,7 +540,7 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
|
|||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
|
||||
unsigned num_deps;
|
||||
unsigned int num_deps;
|
||||
int i;
|
||||
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
|
@ -575,6 +579,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
|
||||
int i;
|
||||
|
||||
if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < p->gang_size; ++i) {
|
||||
p->jobs[i]->shadow_va = shadow->shadow_va;
|
||||
p->jobs[i]->csa_va = shadow->csa_va;
|
||||
p->jobs[i]->gds_va = shadow->gds_va;
|
||||
p->jobs[i]->init_shadow =
|
||||
shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
|
||||
{
|
||||
unsigned int ce_preempt = 0, de_preempt = 0;
|
||||
|
@ -617,6 +641,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
|
|||
if (r)
|
||||
return r;
|
||||
break;
|
||||
case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
|
||||
r = amdgpu_cs_p2_shadow(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -729,6 +758,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
|
|||
|
||||
if (used_vis_vram < total_vis_vram) {
|
||||
u64 free_vis_vram = total_vis_vram - used_vis_vram;
|
||||
|
||||
adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
|
||||
increment_us, us_upper_bound);
|
||||
|
||||
|
@ -1047,9 +1077,8 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
|
|||
|
||||
/* the IB should be reserved at this point */
|
||||
r = amdgpu_bo_kmap(aobj, (void **)&kptr);
|
||||
if (r) {
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
|
||||
|
||||
|
@ -1356,7 +1385,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
|||
/* Cleanup the parser structure */
|
||||
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
|
||||
amdgpu_sync_free(&parser->sync);
|
||||
for (i = 0; i < parser->num_post_deps; i++) {
|
||||
|
|
|
@ -106,3 +106,41 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
|
||||
uint64_t csa_addr)
|
||||
{
|
||||
struct ww_acquire_ctx ticket;
|
||||
struct list_head list;
|
||||
struct amdgpu_bo_list_entry pd;
|
||||
struct ttm_validate_buffer csa_tv;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&list);
|
||||
INIT_LIST_HEAD(&csa_tv.head);
|
||||
csa_tv.bo = &bo->tbo;
|
||||
csa_tv.num_shared = 1;
|
||||
|
||||
list_add(&csa_tv.head, &list);
|
||||
amdgpu_vm_get_pd_bo(vm, &list, &pd);
|
||||
|
||||
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_vm_bo_del(adev, bo_va);
|
||||
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -34,6 +34,9 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo
|
|||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
|
||||
uint64_t csa_addr, uint32_t size);
|
||||
int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
|
||||
uint64_t csa_addr);
|
||||
void amdgpu_free_static_csa(struct amdgpu_bo **bo);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -222,8 +222,19 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
|
|||
drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
|
||||
|
||||
hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
|
||||
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
|
||||
num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
|
||||
|
||||
if (!(adev)->xcp_mgr) {
|
||||
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
|
||||
num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
|
||||
} else {
|
||||
struct amdgpu_fpriv *fpriv;
|
||||
|
||||
fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
|
||||
r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
|
||||
&num_scheds, &scheds);
|
||||
if (r)
|
||||
goto cleanup_entity;
|
||||
}
|
||||
|
||||
/* disable load balance if the hw engine retains context among dependent jobs */
|
||||
if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
|
||||
|
@ -255,7 +266,8 @@ error_free_entity:
|
|||
return r;
|
||||
}
|
||||
|
||||
static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
|
||||
static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
|
||||
struct amdgpu_ctx_entity *entity)
|
||||
{
|
||||
ktime_t res = ns_to_ktime(0);
|
||||
int i;
|
||||
|
@ -268,6 +280,8 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
|
|||
dma_fence_put(entity->fences[i]);
|
||||
}
|
||||
|
||||
amdgpu_xcp_release_sched(adev, entity);
|
||||
|
||||
kfree(entity);
|
||||
return res;
|
||||
}
|
||||
|
@ -303,6 +317,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
|
|||
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
|
||||
struct drm_file *filp, struct amdgpu_ctx *ctx)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
u32 current_stable_pstate;
|
||||
int r;
|
||||
|
||||
|
@ -331,6 +346,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
|
|||
else
|
||||
ctx->stable_pstate = current_stable_pstate;
|
||||
|
||||
ctx->ctx_mgr = &(fpriv->ctx_mgr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -399,7 +415,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
|
|||
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
|
||||
ktime_t spend;
|
||||
|
||||
spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
|
||||
spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
|
||||
atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
|
||||
}
|
||||
}
|
||||
|
@ -576,6 +592,9 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
|
|||
if (atomic_read(&ctx->guilty))
|
||||
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
|
||||
|
||||
if (amdgpu_in_reset(adev))
|
||||
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
|
||||
|
||||
if (adev->ras_enabled && con) {
|
||||
/* Return the cached values in O(1),
|
||||
* and schedule delayed work to cache
|
||||
|
|
|
@ -57,6 +57,7 @@ struct amdgpu_ctx {
|
|||
unsigned long ras_counter_ce;
|
||||
unsigned long ras_counter_ue;
|
||||
uint32_t stable_pstate;
|
||||
struct amdgpu_ctx_mgr *ctx_mgr;
|
||||
};
|
||||
|
||||
struct amdgpu_ctx_mgr {
|
||||
|
|
|
@ -56,14 +56,14 @@
|
|||
*
|
||||
* Bit 62: Indicates a GRBM bank switch is needed
|
||||
* Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
|
||||
* zero)
|
||||
* zero)
|
||||
* Bits 24..33: The SE or ME selector if needed
|
||||
* Bits 34..43: The SH (or SA) or PIPE selector if needed
|
||||
* Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
|
||||
*
|
||||
* Bit 23: Indicates that the PM power gating lock should be held
|
||||
* This is necessary to read registers that might be
|
||||
* unreliable during a power gating transistion.
|
||||
* This is necessary to read registers that might be
|
||||
* unreliable during a power gating transistion.
|
||||
*
|
||||
* The lower bits are the BYTE offset of the register to read. This
|
||||
* allows reading multiple registers in a single call and having
|
||||
|
@ -76,7 +76,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
|
|||
ssize_t result = 0;
|
||||
int r;
|
||||
bool pm_pg_lock, use_bank, use_ring;
|
||||
unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
|
||||
unsigned int instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
|
||||
|
||||
pm_pg_lock = use_bank = use_ring = false;
|
||||
instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
|
||||
|
@ -136,10 +136,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
|
|||
}
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
amdgpu_gfx_select_se_sh(adev, se_bank,
|
||||
sh_bank, instance_bank);
|
||||
sh_bank, instance_bank, 0);
|
||||
} else if (use_ring) {
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid);
|
||||
amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0);
|
||||
}
|
||||
|
||||
if (pm_pg_lock)
|
||||
|
@ -169,10 +169,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
|
|||
|
||||
end:
|
||||
if (use_bank) {
|
||||
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
|
||||
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
} else if (use_ring) {
|
||||
amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
|
||||
amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
}
|
||||
|
||||
|
@ -208,7 +208,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
|
|||
{
|
||||
struct amdgpu_debugfs_regs2_data *rd;
|
||||
|
||||
rd = kzalloc(sizeof *rd, GFP_KERNEL);
|
||||
rd = kzalloc(sizeof(*rd), GFP_KERNEL);
|
||||
if (!rd)
|
||||
return -ENOMEM;
|
||||
rd->adev = file_inode(file)->i_private;
|
||||
|
@ -221,6 +221,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
|
|||
static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct amdgpu_debugfs_regs2_data *rd = file->private_data;
|
||||
|
||||
mutex_destroy(&rd->lock);
|
||||
kfree(file->private_data);
|
||||
return 0;
|
||||
|
@ -262,14 +263,14 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
|
|||
}
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se,
|
||||
rd->id.grbm.sh,
|
||||
rd->id.grbm.instance);
|
||||
rd->id.grbm.sh,
|
||||
rd->id.grbm.instance, rd->id.xcc_id);
|
||||
}
|
||||
|
||||
if (rd->id.use_srbm) {
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe,
|
||||
rd->id.srbm.queue, rd->id.srbm.vmid);
|
||||
rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id);
|
||||
}
|
||||
|
||||
if (rd->id.pg_lock)
|
||||
|
@ -295,12 +296,12 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
|
|||
}
|
||||
end:
|
||||
if (rd->id.use_grbm) {
|
||||
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
|
||||
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
}
|
||||
|
||||
if (rd->id.use_srbm) {
|
||||
amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
|
||||
amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
}
|
||||
|
||||
|
@ -319,18 +320,45 @@ end:
|
|||
static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data)
|
||||
{
|
||||
struct amdgpu_debugfs_regs2_data *rd = f->private_data;
|
||||
struct amdgpu_debugfs_regs2_iocdata v1_data;
|
||||
int r;
|
||||
|
||||
mutex_lock(&rd->lock);
|
||||
|
||||
switch (cmd) {
|
||||
case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2:
|
||||
r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data,
|
||||
sizeof(rd->id));
|
||||
if (r)
|
||||
r = -EINVAL;
|
||||
goto done;
|
||||
case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
|
||||
mutex_lock(&rd->lock);
|
||||
r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata *)data, sizeof rd->id);
|
||||
mutex_unlock(&rd->lock);
|
||||
return r ? -EINVAL : 0;
|
||||
r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data,
|
||||
sizeof(v1_data));
|
||||
if (r) {
|
||||
r = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
goto v1_copy;
|
||||
default:
|
||||
return -EINVAL;
|
||||
r = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
return 0;
|
||||
|
||||
v1_copy:
|
||||
rd->id.use_srbm = v1_data.use_srbm;
|
||||
rd->id.use_grbm = v1_data.use_grbm;
|
||||
rd->id.pg_lock = v1_data.pg_lock;
|
||||
rd->id.grbm.se = v1_data.grbm.se;
|
||||
rd->id.grbm.sh = v1_data.grbm.sh;
|
||||
rd->id.grbm.instance = v1_data.grbm.instance;
|
||||
rd->id.srbm.me = v1_data.srbm.me;
|
||||
rd->id.srbm.pipe = v1_data.srbm.pipe;
|
||||
rd->id.srbm.queue = v1_data.srbm.queue;
|
||||
rd->id.xcc_id = 0;
|
||||
done:
|
||||
mutex_unlock(&rd->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
|
||||
|
@ -343,6 +371,136 @@ static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf
|
|||
return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1);
|
||||
}
|
||||
|
||||
static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct amdgpu_debugfs_gprwave_data *rd;
|
||||
|
||||
rd = kzalloc(sizeof *rd, GFP_KERNEL);
|
||||
if (!rd)
|
||||
return -ENOMEM;
|
||||
rd->adev = file_inode(file)->i_private;
|
||||
file->private_data = rd;
|
||||
mutex_init(&rd->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct amdgpu_debugfs_gprwave_data *rd = file->private_data;
|
||||
mutex_destroy(&rd->lock);
|
||||
kfree(file->private_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
|
||||
{
|
||||
struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
|
||||
struct amdgpu_device *adev = rd->adev;
|
||||
ssize_t result = 0;
|
||||
int r;
|
||||
uint32_t *data, x;
|
||||
|
||||
if (size & 0x3 || *pos & 0x3)
|
||||
return -EINVAL;
|
||||
|
||||
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
|
||||
if (r < 0) {
|
||||
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_virt_enable_access_debugfs(adev);
|
||||
if (r < 0) {
|
||||
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
||||
return r;
|
||||
}
|
||||
|
||||
data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
|
||||
if (!data) {
|
||||
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
||||
amdgpu_virt_disable_access_debugfs(adev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* switch to the specific se/sh/cu */
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id);
|
||||
|
||||
if (!rd->id.gpr_or_wave) {
|
||||
x = 0;
|
||||
if (adev->gfx.funcs->read_wave_data)
|
||||
adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x);
|
||||
} else {
|
||||
x = size >> 2;
|
||||
if (rd->id.gpr.vpgr_or_sgpr) {
|
||||
if (adev->gfx.funcs->read_wave_vgprs)
|
||||
adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data);
|
||||
} else {
|
||||
if (adev->gfx.funcs->read_wave_sgprs)
|
||||
adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data);
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
|
||||
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
||||
|
||||
if (!x) {
|
||||
result = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
while (size && (*pos < x * 4)) {
|
||||
uint32_t value;
|
||||
|
||||
value = data[*pos >> 2];
|
||||
r = put_user(value, (uint32_t *)buf);
|
||||
if (r) {
|
||||
result = r;
|
||||
goto done;
|
||||
}
|
||||
|
||||
result += 4;
|
||||
buf += 4;
|
||||
*pos += 4;
|
||||
size -= 4;
|
||||
}
|
||||
|
||||
done:
|
||||
amdgpu_virt_disable_access_debugfs(adev);
|
||||
kfree(data);
|
||||
return result;
|
||||
}
|
||||
|
||||
static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data)
|
||||
{
|
||||
struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
|
||||
int r = 0;
|
||||
|
||||
mutex_lock(&rd->lock);
|
||||
|
||||
switch (cmd) {
|
||||
case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE:
|
||||
if (copy_from_user(&rd->id,
|
||||
(struct amdgpu_debugfs_gprwave_iocdata *)data,
|
||||
sizeof(rd->id)))
|
||||
r = -EFAULT;
|
||||
goto done;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
done:
|
||||
mutex_unlock(&rd->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
|
||||
|
@ -863,7 +1021,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
|
|||
* The offset being sought changes which wave that the status data
|
||||
* will be returned for. The bits are used as follows:
|
||||
*
|
||||
* Bits 0..6: Byte offset into data
|
||||
* Bits 0..6: Byte offset into data
|
||||
* Bits 7..14: SE selector
|
||||
* Bits 15..22: SH/SA selector
|
||||
* Bits 23..30: CU/{WGP+SIMD} selector
|
||||
|
@ -907,13 +1065,13 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
|
|||
|
||||
/* switch to the specific se/sh/cu */
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
amdgpu_gfx_select_se_sh(adev, se, sh, cu);
|
||||
amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
|
||||
|
||||
x = 0;
|
||||
if (adev->gfx.funcs->read_wave_data)
|
||||
adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
|
||||
adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x);
|
||||
|
||||
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
|
||||
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
|
||||
|
@ -1001,17 +1159,17 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
|
|||
|
||||
/* switch to the specific se/sh/cu */
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
amdgpu_gfx_select_se_sh(adev, se, sh, cu);
|
||||
amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
|
||||
|
||||
if (bank == 0) {
|
||||
if (adev->gfx.funcs->read_wave_vgprs)
|
||||
adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
|
||||
adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data);
|
||||
} else {
|
||||
if (adev->gfx.funcs->read_wave_sgprs)
|
||||
adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
|
||||
adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data);
|
||||
}
|
||||
|
||||
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
|
||||
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
|
||||
|
@ -1339,6 +1497,15 @@ static const struct file_operations amdgpu_debugfs_regs2_fops = {
|
|||
.llseek = default_llseek
|
||||
};
|
||||
|
||||
static const struct file_operations amdgpu_debugfs_gprwave_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl,
|
||||
.read = amdgpu_debugfs_gprwave_read,
|
||||
.open = amdgpu_debugfs_gprwave_open,
|
||||
.release = amdgpu_debugfs_gprwave_release,
|
||||
.llseek = default_llseek
|
||||
};
|
||||
|
||||
static const struct file_operations amdgpu_debugfs_regs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = amdgpu_debugfs_regs_read,
|
||||
|
@ -1416,6 +1583,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
|
|||
static const struct file_operations *debugfs_regs[] = {
|
||||
&amdgpu_debugfs_regs_fops,
|
||||
&amdgpu_debugfs_regs2_fops,
|
||||
&amdgpu_debugfs_gprwave_fops,
|
||||
&amdgpu_debugfs_regs_didt_fops,
|
||||
&amdgpu_debugfs_regs_pcie_fops,
|
||||
&amdgpu_debugfs_regs_smc_fops,
|
||||
|
@ -1429,9 +1597,10 @@ static const struct file_operations *debugfs_regs[] = {
|
|||
&amdgpu_debugfs_gfxoff_residency_fops,
|
||||
};
|
||||
|
||||
static const char *debugfs_regs_names[] = {
|
||||
static const char * const debugfs_regs_names[] = {
|
||||
"amdgpu_regs",
|
||||
"amdgpu_regs2",
|
||||
"amdgpu_gprwave",
|
||||
"amdgpu_regs_didt",
|
||||
"amdgpu_regs_pcie",
|
||||
"amdgpu_regs_smc",
|
||||
|
@ -1447,7 +1616,7 @@ static const char *debugfs_regs_names[] = {
|
|||
|
||||
/**
|
||||
* amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
|
||||
* register access.
|
||||
* register access.
|
||||
*
|
||||
* @adev: The device to attach the debugfs entries to
|
||||
*/
|
||||
|
@ -1459,7 +1628,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
|
|||
|
||||
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
|
||||
ent = debugfs_create_file(debugfs_regs_names[i],
|
||||
S_IFREG | S_IRUGO, root,
|
||||
S_IFREG | 0444, root,
|
||||
adev, debugfs_regs[i]);
|
||||
if (!i && !IS_ERR_OR_NULL(ent))
|
||||
i_size_write(ent->d_inode, adev->rmmio_size);
|
||||
|
@ -1470,7 +1639,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
|
|||
|
||||
static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_device *adev = m->private;
|
||||
struct drm_device *dev = adev_to_drm(adev);
|
||||
int r = 0, i;
|
||||
|
||||
|
@ -1494,12 +1663,12 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
|
|||
kthread_park(ring->sched.thread);
|
||||
}
|
||||
|
||||
seq_printf(m, "run ib test:\n");
|
||||
seq_puts(m, "run ib test:\n");
|
||||
r = amdgpu_ib_ring_tests(adev);
|
||||
if (r)
|
||||
seq_printf(m, "ib ring tests failed (%d).\n", r);
|
||||
else
|
||||
seq_printf(m, "ib ring tests passed.\n");
|
||||
seq_puts(m, "ib ring tests passed.\n");
|
||||
|
||||
/* go on the scheduler */
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
|
||||
|
@ -1581,7 +1750,7 @@ static int amdgpu_debugfs_benchmark(void *data, u64 val)
|
|||
|
||||
static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_device *adev = m->private;
|
||||
struct drm_device *dev = adev_to_drm(adev);
|
||||
struct drm_file *file;
|
||||
int r;
|
||||
|
@ -1978,7 +2147,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
|
|||
amdgpu_debugfs_ring_init(adev, ring);
|
||||
}
|
||||
|
||||
for ( i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
if (!amdgpu_vcnfw_log)
|
||||
break;
|
||||
|
||||
|
|
|
@ -707,6 +707,48 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
|
|||
return r;
|
||||
}
|
||||
|
||||
u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
|
||||
u64 reg_addr)
|
||||
{
|
||||
unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
|
||||
u32 r;
|
||||
void __iomem *pcie_index_offset;
|
||||
void __iomem *pcie_index_hi_offset;
|
||||
void __iomem *pcie_data_offset;
|
||||
|
||||
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
|
||||
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
|
||||
if (adev->nbio.funcs->get_pcie_index_hi_offset)
|
||||
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
|
||||
else
|
||||
pcie_index_hi = 0;
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
|
||||
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
|
||||
if (pcie_index_hi != 0)
|
||||
pcie_index_hi_offset = (void __iomem *)adev->rmmio +
|
||||
pcie_index_hi * 4;
|
||||
|
||||
writel(reg_addr, pcie_index_offset);
|
||||
readl(pcie_index_offset);
|
||||
if (pcie_index_hi != 0) {
|
||||
writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
|
||||
readl(pcie_index_hi_offset);
|
||||
}
|
||||
r = readl(pcie_data_offset);
|
||||
|
||||
/* clear the high bits */
|
||||
if (pcie_index_hi != 0) {
|
||||
writel(0, pcie_index_hi_offset);
|
||||
readl(pcie_index_hi_offset);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_indirect_rreg64 - read a 64bits indirect register
|
||||
*
|
||||
|
@ -747,8 +789,6 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
|
|||
* amdgpu_device_indirect_wreg - write an indirect register address
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @pcie_index: mmio register offset
|
||||
* @pcie_data: mmio register offset
|
||||
* @reg_addr: indirect register offset
|
||||
* @reg_data: indirect register data
|
||||
*
|
||||
|
@ -774,12 +814,50 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
|
|||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
}
|
||||
|
||||
void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
|
||||
u64 reg_addr, u32 reg_data)
|
||||
{
|
||||
unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
|
||||
void __iomem *pcie_index_offset;
|
||||
void __iomem *pcie_index_hi_offset;
|
||||
void __iomem *pcie_data_offset;
|
||||
|
||||
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
|
||||
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
|
||||
if (adev->nbio.funcs->get_pcie_index_hi_offset)
|
||||
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
|
||||
else
|
||||
pcie_index_hi = 0;
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
|
||||
pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
|
||||
if (pcie_index_hi != 0)
|
||||
pcie_index_hi_offset = (void __iomem *)adev->rmmio +
|
||||
pcie_index_hi * 4;
|
||||
|
||||
writel(reg_addr, pcie_index_offset);
|
||||
readl(pcie_index_offset);
|
||||
if (pcie_index_hi != 0) {
|
||||
writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
|
||||
readl(pcie_index_hi_offset);
|
||||
}
|
||||
writel(reg_data, pcie_data_offset);
|
||||
readl(pcie_data_offset);
|
||||
|
||||
/* clear the high bits */
|
||||
if (pcie_index_hi != 0) {
|
||||
writel(0, pcie_index_hi_offset);
|
||||
readl(pcie_index_hi_offset);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @pcie_index: mmio register offset
|
||||
* @pcie_data: mmio register offset
|
||||
* @reg_addr: indirect register offset
|
||||
* @reg_data: indirect register data
|
||||
*
|
||||
|
@ -840,6 +918,13 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
|
||||
{
|
||||
DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
|
||||
BUG();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_invalid_wreg - dummy reg write function
|
||||
*
|
||||
|
@ -857,6 +942,13 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
|
|||
BUG();
|
||||
}
|
||||
|
||||
static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
|
||||
{
|
||||
DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
|
||||
reg, v);
|
||||
BUG();
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_invalid_rreg64 - dummy 64 bit reg read function
|
||||
*
|
||||
|
@ -942,7 +1034,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
|
|||
{
|
||||
amdgpu_asic_pre_asic_init(adev);
|
||||
|
||||
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
|
||||
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
|
||||
adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
|
||||
return amdgpu_atomfirmware_asic_init(adev, true);
|
||||
else
|
||||
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
|
||||
|
@ -998,7 +1091,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
|
|||
if (array_size % 3)
|
||||
return;
|
||||
|
||||
for (i = 0; i < array_size; i +=3) {
|
||||
for (i = 0; i < array_size; i += 3) {
|
||||
reg = registers[i + 0];
|
||||
and_mask = registers[i + 1];
|
||||
or_mask = registers[i + 2];
|
||||
|
@ -1090,7 +1183,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
|
|||
* doorbells are in the first page. So with paging queue enabled,
|
||||
* the max num_kernel_doorbells should + 1 page (0x400 in dword)
|
||||
*/
|
||||
if (adev->asic_type >= CHIP_VEGA10)
|
||||
if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
|
||||
adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0))
|
||||
adev->doorbell.num_kernel_doorbells += 0x400;
|
||||
}
|
||||
|
||||
|
@ -1291,6 +1385,15 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
|
||||
{
|
||||
if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* GPU helpers function.
|
||||
*/
|
||||
|
@ -1310,6 +1413,9 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
|
|||
if (amdgpu_sriov_vf(adev))
|
||||
return false;
|
||||
|
||||
if (!amdgpu_device_read_bios(adev))
|
||||
return false;
|
||||
|
||||
if (amdgpu_passthrough(adev)) {
|
||||
/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
|
||||
* some old smc fw still need driver do vPost otherwise gpu hang, while
|
||||
|
@ -1547,7 +1653,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
|||
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
|
||||
amdgpu_sched_jobs);
|
||||
amdgpu_sched_jobs = 4;
|
||||
} else if (!is_power_of_2(amdgpu_sched_jobs)){
|
||||
} else if (!is_power_of_2(amdgpu_sched_jobs)) {
|
||||
dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
|
||||
amdgpu_sched_jobs);
|
||||
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
|
||||
|
@ -2194,7 +2300,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
|||
total = true;
|
||||
for (i = 0; i < adev->num_ip_blocks; i++) {
|
||||
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
|
||||
DRM_ERROR("disabled ip block: %d <%s>\n",
|
||||
DRM_WARN("disabled ip block: %d <%s>\n",
|
||||
i, adev->ip_blocks[i].version->funcs->name);
|
||||
adev->ip_blocks[i].status.valid = false;
|
||||
} else {
|
||||
|
@ -2220,14 +2326,16 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
|||
return r;
|
||||
|
||||
/* Read BIOS */
|
||||
if (!amdgpu_get_bios(adev))
|
||||
return -EINVAL;
|
||||
if (amdgpu_device_read_bios(adev)) {
|
||||
if (!amdgpu_get_bios(adev))
|
||||
return -EINVAL;
|
||||
|
||||
r = amdgpu_atombios_init(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
|
||||
return r;
|
||||
r = amdgpu_atombios_init(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
/*get pf2vf msg info at it's earliest time*/
|
||||
|
@ -2376,6 +2484,8 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
amdgpu_xcp_update_partition_sched_list(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2533,8 +2643,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
|||
goto init_failed;
|
||||
|
||||
/* Don't init kfd if whole hive need to be reset during init */
|
||||
if (!adev->gmc.xgmi.pending_reset)
|
||||
if (!adev->gmc.xgmi.pending_reset) {
|
||||
kgd2kfd_init_zone_device(adev);
|
||||
amdgpu_amdkfd_device_init(adev);
|
||||
}
|
||||
|
||||
amdgpu_fru_get_product_info(adev);
|
||||
|
||||
|
@ -2759,8 +2871,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
|
|||
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
|
||||
|
||||
/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
|
||||
if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
|
||||
adev->asic_type == CHIP_ALDEBARAN ))
|
||||
if (amdgpu_passthrough(adev) &&
|
||||
((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
|
||||
adev->asic_type == CHIP_ALDEBARAN))
|
||||
amdgpu_dpm_handle_passthrough_sbr(adev, true);
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
|
@ -3089,7 +3202,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
|
|||
}
|
||||
adev->ip_blocks[i].status.hw = false;
|
||||
/* handle putting the SMC in the appropriate state */
|
||||
if(!amdgpu_sriov_vf(adev)){
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
|
||||
r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
|
||||
if (r) {
|
||||
|
@ -3608,6 +3721,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
adev->smc_wreg = &amdgpu_invalid_wreg;
|
||||
adev->pcie_rreg = &amdgpu_invalid_rreg;
|
||||
adev->pcie_wreg = &amdgpu_invalid_wreg;
|
||||
adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
|
||||
adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
|
||||
adev->pciep_rreg = &amdgpu_invalid_rreg;
|
||||
adev->pciep_wreg = &amdgpu_invalid_wreg;
|
||||
adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
|
||||
|
@ -3633,6 +3748,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
mutex_init(&adev->srbm_mutex);
|
||||
mutex_init(&adev->gfx.pipe_reserve_mutex);
|
||||
mutex_init(&adev->gfx.gfx_off_mutex);
|
||||
mutex_init(&adev->gfx.partition_mutex);
|
||||
mutex_init(&adev->grbm_idx_mutex);
|
||||
mutex_init(&adev->mn_lock);
|
||||
mutex_init(&adev->virt.vf_errors.lock);
|
||||
|
@ -3708,8 +3824,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
|
||||
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
|
||||
|
||||
amdgpu_device_get_pcie_info(adev);
|
||||
|
||||
if (amdgpu_mcbp)
|
||||
DRM_INFO("MCBP is enabled\n");
|
||||
|
||||
|
@ -3725,6 +3839,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
/* detect hw virtualization here */
|
||||
amdgpu_detect_virtualization(adev);
|
||||
|
||||
amdgpu_device_get_pcie_info(adev);
|
||||
|
||||
r = amdgpu_device_get_job_timeout_settings(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
|
||||
|
@ -3753,21 +3869,24 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
/* enable PCIE atomic ops */
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
|
||||
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
|
||||
(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (adev->virt.fw_reserve.p_pf2vf)
|
||||
adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
|
||||
adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
|
||||
(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
|
||||
/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
|
||||
* internal path natively support atomics, set have_atomics_support to true.
|
||||
*/
|
||||
else if ((adev->flags & AMD_IS_APU) &&
|
||||
(adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
|
||||
} else if ((adev->flags & AMD_IS_APU) &&
|
||||
(adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
|
||||
adev->have_atomics_support = true;
|
||||
else
|
||||
} else {
|
||||
adev->have_atomics_support =
|
||||
!pci_enable_atomic_ops_to_root(adev->pdev,
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
|
||||
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
|
||||
}
|
||||
|
||||
if (!adev->have_atomics_support)
|
||||
dev_info(adev->dev, "PCIE atomic ops is not supported\n");
|
||||
|
||||
|
@ -3783,7 +3902,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
amdgpu_reset_init(adev);
|
||||
|
||||
/* detect if we are with an SRIOV vbios */
|
||||
amdgpu_device_detect_sriov_bios(adev);
|
||||
if (adev->bios)
|
||||
amdgpu_device_detect_sriov_bios(adev);
|
||||
|
||||
/* check if we need to reset the asic
|
||||
* E.g., driver was not cleanly unloaded previously, etc.
|
||||
|
@ -3835,25 +3955,27 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
}
|
||||
}
|
||||
|
||||
if (adev->is_atom_fw) {
|
||||
/* Initialize clocks */
|
||||
r = amdgpu_atomfirmware_get_clock_info(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
|
||||
goto failed;
|
||||
if (adev->bios) {
|
||||
if (adev->is_atom_fw) {
|
||||
/* Initialize clocks */
|
||||
r = amdgpu_atomfirmware_get_clock_info(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
|
||||
goto failed;
|
||||
}
|
||||
} else {
|
||||
/* Initialize clocks */
|
||||
r = amdgpu_atombios_get_clock_info(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
|
||||
goto failed;
|
||||
}
|
||||
/* init i2c buses */
|
||||
if (!amdgpu_device_has_dc_support(adev))
|
||||
amdgpu_atombios_i2c_init(adev);
|
||||
}
|
||||
} else {
|
||||
/* Initialize clocks */
|
||||
r = amdgpu_atombios_get_clock_info(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
|
||||
goto failed;
|
||||
}
|
||||
/* init i2c buses */
|
||||
if (!amdgpu_device_has_dc_support(adev))
|
||||
amdgpu_atombios_i2c_init(adev);
|
||||
}
|
||||
|
||||
fence_driver_init:
|
||||
|
@ -4019,7 +4141,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
|
|||
adev->mman.aper_base_kaddr = NULL;
|
||||
|
||||
/* Memory manager related */
|
||||
if (!adev->gmc.xgmi.connected_to_cpu) {
|
||||
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
|
||||
arch_phys_wc_del(adev->gmc.vram_mtrr);
|
||||
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
|
||||
}
|
||||
|
@ -4049,7 +4171,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
|
|||
|
||||
/* disable all interrupts */
|
||||
amdgpu_irq_disable_all(adev);
|
||||
if (adev->mode_info.mode_config_initialized){
|
||||
if (adev->mode_info.mode_config_initialized) {
|
||||
if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
|
||||
drm_helper_force_disable_all(adev_to_drm(adev));
|
||||
else
|
||||
|
@ -4714,42 +4836,42 @@ disabled:
|
|||
|
||||
int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 i;
|
||||
int ret = 0;
|
||||
u32 i;
|
||||
int ret = 0;
|
||||
|
||||
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
|
||||
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
|
||||
|
||||
dev_info(adev->dev, "GPU mode1 reset\n");
|
||||
dev_info(adev->dev, "GPU mode1 reset\n");
|
||||
|
||||
/* disable BM */
|
||||
pci_clear_master(adev->pdev);
|
||||
/* disable BM */
|
||||
pci_clear_master(adev->pdev);
|
||||
|
||||
amdgpu_device_cache_pci_state(adev->pdev);
|
||||
amdgpu_device_cache_pci_state(adev->pdev);
|
||||
|
||||
if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
|
||||
dev_info(adev->dev, "GPU smu mode1 reset\n");
|
||||
ret = amdgpu_dpm_mode1_reset(adev);
|
||||
} else {
|
||||
dev_info(adev->dev, "GPU psp mode1 reset\n");
|
||||
ret = psp_gpu_reset(adev);
|
||||
}
|
||||
if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
|
||||
dev_info(adev->dev, "GPU smu mode1 reset\n");
|
||||
ret = amdgpu_dpm_mode1_reset(adev);
|
||||
} else {
|
||||
dev_info(adev->dev, "GPU psp mode1 reset\n");
|
||||
ret = psp_gpu_reset(adev);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
dev_err(adev->dev, "GPU mode1 reset failed\n");
|
||||
if (ret)
|
||||
dev_err(adev->dev, "GPU mode1 reset failed\n");
|
||||
|
||||
amdgpu_device_load_pci_state(adev->pdev);
|
||||
amdgpu_device_load_pci_state(adev->pdev);
|
||||
|
||||
/* wait for asic to come out of reset */
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
u32 memsize = adev->nbio.funcs->get_memsize(adev);
|
||||
/* wait for asic to come out of reset */
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
u32 memsize = adev->nbio.funcs->get_memsize(adev);
|
||||
|
||||
if (memsize != 0xffffffff)
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
if (memsize != 0xffffffff)
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
amdgpu_atombios_scratch_regs_engine_hung(adev, false);
|
||||
return ret;
|
||||
amdgpu_atombios_scratch_regs_engine_hung(adev, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
||||
|
@ -5478,7 +5600,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
|
|||
adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
|
||||
|
||||
/* covers APUs as well */
|
||||
if (pci_is_root_bus(adev->pdev->bus)) {
|
||||
if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
|
||||
if (adev->pm.pcie_gen_mask == 0)
|
||||
adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
|
||||
if (adev->pm.pcie_mlw_mask == 0)
|
||||
|
@ -5959,6 +6081,7 @@ void amdgpu_device_halt(struct amdgpu_device *adev)
|
|||
struct pci_dev *pdev = adev->pdev;
|
||||
struct drm_device *ddev = adev_to_drm(adev);
|
||||
|
||||
amdgpu_xcp_dev_unplug(adev);
|
||||
drm_dev_unplug(ddev);
|
||||
|
||||
amdgpu_irq_disable_all(adev);
|
||||
|
@ -6079,3 +6202,31 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
|
|||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
|
||||
uint32_t inst, uint32_t reg_addr, char reg_name[],
|
||||
uint32_t expected_value, uint32_t mask)
|
||||
{
|
||||
uint32_t ret = 0;
|
||||
uint32_t old_ = 0;
|
||||
uint32_t tmp_ = RREG32(reg_addr);
|
||||
uint32_t loop = adev->usec_timeout;
|
||||
|
||||
while ((tmp_ & (mask)) != (expected_value)) {
|
||||
if (old_ != tmp_) {
|
||||
loop = adev->usec_timeout;
|
||||
old_ = tmp_;
|
||||
} else
|
||||
udelay(1);
|
||||
tmp_ = RREG32(reg_addr);
|
||||
loop--;
|
||||
if (!loop) {
|
||||
DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
|
||||
inst, reg_name, (uint32_t)expected_value,
|
||||
(uint32_t)(tmp_ & (mask)));
|
||||
ret = -ETIMEDOUT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
#include "soc15.h"
|
||||
#include "gfx_v9_0.h"
|
||||
#include "gfx_v9_4_3.h"
|
||||
#include "gmc_v9_0.h"
|
||||
#include "df_v1_7.h"
|
||||
#include "df_v3_6.h"
|
||||
|
@ -76,12 +77,15 @@
|
|||
#include "jpeg_v3_0.h"
|
||||
#include "vcn_v4_0.h"
|
||||
#include "jpeg_v4_0.h"
|
||||
#include "vcn_v4_0_3.h"
|
||||
#include "jpeg_v4_0_3.h"
|
||||
#include "amdgpu_vkms.h"
|
||||
#include "mes_v10_1.h"
|
||||
#include "mes_v11_0.h"
|
||||
#include "smuio_v11_0.h"
|
||||
#include "smuio_v11_0_6.h"
|
||||
#include "smuio_v13_0.h"
|
||||
#include "smuio_v13_0_3.h"
|
||||
#include "smuio_v13_0_6.h"
|
||||
|
||||
#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
|
||||
|
@ -200,14 +204,44 @@ static int hw_id_map[MAX_HWIP] = {
|
|||
[PCIE_HWIP] = PCIE_HWID,
|
||||
};
|
||||
|
||||
static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary)
|
||||
static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
|
||||
{
|
||||
u64 tmr_offset, tmr_size, pos;
|
||||
void *discv_regn;
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
|
||||
|
||||
/* This region is read-only and reserved from system use */
|
||||
discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC);
|
||||
if (discv_regn) {
|
||||
memcpy(binary, discv_regn, adev->mman.discovery_tmr_size);
|
||||
memunmap(discv_regn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
|
||||
uint8_t *binary)
|
||||
{
|
||||
uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
|
||||
uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
|
||||
int ret = 0;
|
||||
|
||||
amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
|
||||
adev->mman.discovery_tmr_size, false);
|
||||
return 0;
|
||||
if (vram_size) {
|
||||
uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
|
||||
amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
|
||||
adev->mman.discovery_tmr_size, false);
|
||||
} else {
|
||||
ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary)
|
||||
|
@ -280,6 +314,7 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
|
|||
case 0xCF:
|
||||
case 0xDF:
|
||||
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
|
||||
adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -301,33 +336,30 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
|
|||
if (!adev->mman.discovery_bin)
|
||||
return -ENOMEM;
|
||||
|
||||
r = amdgpu_discovery_read_binary_from_vram(adev, adev->mman.discovery_bin);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to read ip discovery binary from vram\n");
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin) || amdgpu_discovery == 2) {
|
||||
/* ignore the discovery binary from vram if discovery=2 in kernel module parameter */
|
||||
if (amdgpu_discovery == 2)
|
||||
dev_info(adev->dev,"force read ip discovery binary from file");
|
||||
else
|
||||
dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n");
|
||||
|
||||
/* retry read ip discovery binary from file */
|
||||
/* Read from file if it is the preferred option */
|
||||
if (amdgpu_discovery == 2) {
|
||||
dev_info(adev->dev, "use ip discovery information from file");
|
||||
r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
|
||||
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to read ip discovery binary from file\n");
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
/* check the ip discovery binary signature */
|
||||
if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
|
||||
dev_warn(adev->dev, "get invalid ip discovery binary signature from file\n");
|
||||
r = -EINVAL;
|
||||
|
||||
} else {
|
||||
r = amdgpu_discovery_read_binary_from_mem(
|
||||
adev, adev->mman.discovery_bin);
|
||||
if (r)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* check the ip discovery binary signature */
|
||||
if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
|
||||
dev_err(adev->dev,
|
||||
"get invalid ip discovery binary signature\n");
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bhdr = (struct binary_header *)adev->mman.discovery_bin;
|
||||
|
@ -471,11 +503,11 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev)
|
|||
adev->mman.discovery_bin = NULL;
|
||||
}
|
||||
|
||||
static int amdgpu_discovery_validate_ip(const struct ip *ip)
|
||||
static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip)
|
||||
{
|
||||
if (ip->number_instance >= HWIP_MAX_INSTANCE) {
|
||||
DRM_ERROR("Unexpected number_instance (%d) from ip discovery blob\n",
|
||||
ip->number_instance);
|
||||
if (ip->instance_number >= HWIP_MAX_INSTANCE) {
|
||||
DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n",
|
||||
ip->instance_number);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) {
|
||||
|
@ -493,7 +525,7 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
|
|||
struct binary_header *bhdr;
|
||||
struct ip_discovery_header *ihdr;
|
||||
struct die_header *dhdr;
|
||||
struct ip *ip;
|
||||
struct ip_v4 *ip;
|
||||
uint16_t die_offset, ip_offset, num_dies, num_ips;
|
||||
int i, j;
|
||||
|
||||
|
@ -510,29 +542,41 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
|
|||
ip_offset = die_offset + sizeof(*dhdr);
|
||||
|
||||
for (j = 0; j < num_ips; j++) {
|
||||
ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
|
||||
ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
|
||||
|
||||
if (amdgpu_discovery_validate_ip(ip))
|
||||
goto next_ip;
|
||||
|
||||
if (le16_to_cpu(ip->harvest) == 1) {
|
||||
if (le16_to_cpu(ip->variant) == 1) {
|
||||
switch (le16_to_cpu(ip->hw_id)) {
|
||||
case VCN_HWID:
|
||||
(*vcn_harvest_count)++;
|
||||
if (ip->number_instance == 0)
|
||||
if (ip->instance_number == 0) {
|
||||
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
|
||||
else
|
||||
adev->vcn.inst_mask &=
|
||||
~AMDGPU_VCN_HARVEST_VCN0;
|
||||
adev->jpeg.inst_mask &=
|
||||
~AMDGPU_VCN_HARVEST_VCN0;
|
||||
} else {
|
||||
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
|
||||
adev->vcn.inst_mask &=
|
||||
~AMDGPU_VCN_HARVEST_VCN1;
|
||||
adev->jpeg.inst_mask &=
|
||||
~AMDGPU_VCN_HARVEST_VCN1;
|
||||
}
|
||||
break;
|
||||
case DMU_HWID:
|
||||
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
next_ip:
|
||||
ip_offset += struct_size(ip, base_address, ip->num_base_address);
|
||||
if (ihdr->base_addr_64_bit)
|
||||
ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
|
||||
else
|
||||
ip_offset += struct_size(ip, base_address, ip->num_base_address);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -564,10 +608,15 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
|
|||
switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
|
||||
case VCN_HWID:
|
||||
(*vcn_harvest_count)++;
|
||||
if (harvest_info->list[i].number_instance == 0)
|
||||
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
|
||||
else
|
||||
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
|
||||
adev->vcn.harvest_config |=
|
||||
(1 << harvest_info->list[i].number_instance);
|
||||
adev->jpeg.harvest_config |=
|
||||
(1 << harvest_info->list[i].number_instance);
|
||||
|
||||
adev->vcn.inst_mask &=
|
||||
~(1U << harvest_info->list[i].number_instance);
|
||||
adev->jpeg.inst_mask &=
|
||||
~(1U << harvest_info->list[i].number_instance);
|
||||
break;
|
||||
case DMU_HWID:
|
||||
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
|
||||
|
@ -577,6 +626,14 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
|
|||
1 << (le16_to_cpu(harvest_info->list[i].number_instance));
|
||||
(*umc_harvest_count)++;
|
||||
break;
|
||||
case GC_HWID:
|
||||
adev->gfx.xcc_mask &=
|
||||
~(1U << harvest_info->list[i].number_instance);
|
||||
break;
|
||||
case SDMA0_HWID:
|
||||
adev->sdma.sdma_mask &=
|
||||
~(1U << harvest_info->list[i].number_instance);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -836,9 +893,40 @@ static void ip_disc_release(struct kobject *kobj)
|
|||
kfree(ip_top);
|
||||
}
|
||||
|
||||
static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
|
||||
uint16_t hw_id, uint8_t inst)
|
||||
{
|
||||
uint8_t harvest = 0;
|
||||
|
||||
/* Until a uniform way is figured, get mask based on hwid */
|
||||
switch (hw_id) {
|
||||
case VCN_HWID:
|
||||
harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
|
||||
break;
|
||||
case DMU_HWID:
|
||||
if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
|
||||
harvest = 0x1;
|
||||
break;
|
||||
case UMC_HWID:
|
||||
/* TODO: It needs another parsing; for now, ignore.*/
|
||||
break;
|
||||
case GC_HWID:
|
||||
harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0;
|
||||
break;
|
||||
case SDMA0_HWID:
|
||||
harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return harvest;
|
||||
}
|
||||
|
||||
static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
|
||||
struct ip_die_entry *ip_die_entry,
|
||||
const size_t _ip_offset, const int num_ips)
|
||||
const size_t _ip_offset, const int num_ips,
|
||||
bool reg_base_64)
|
||||
{
|
||||
int ii, jj, kk, res;
|
||||
|
||||
|
@ -852,10 +940,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
|
|||
size_t ip_offset = _ip_offset;
|
||||
|
||||
for (jj = 0; jj < num_ips; jj++) {
|
||||
struct ip *ip;
|
||||
struct ip_v4 *ip;
|
||||
struct ip_hw_instance *ip_hw_instance;
|
||||
|
||||
ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
|
||||
ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
|
||||
if (amdgpu_discovery_validate_ip(ip) ||
|
||||
le16_to_cpu(ip->hw_id) != ii)
|
||||
goto next_ip;
|
||||
|
@ -903,22 +991,35 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
|
|||
return -ENOMEM;
|
||||
}
|
||||
ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */
|
||||
ip_hw_instance->num_instance = ip->number_instance;
|
||||
ip_hw_instance->num_instance = ip->instance_number;
|
||||
ip_hw_instance->major = ip->major;
|
||||
ip_hw_instance->minor = ip->minor;
|
||||
ip_hw_instance->revision = ip->revision;
|
||||
ip_hw_instance->harvest = ip->harvest;
|
||||
ip_hw_instance->harvest =
|
||||
amdgpu_discovery_get_harvest_info(
|
||||
adev, ip_hw_instance->hw_id,
|
||||
ip_hw_instance->num_instance);
|
||||
ip_hw_instance->num_base_addresses = ip->num_base_address;
|
||||
|
||||
for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++)
|
||||
ip_hw_instance->base_addr[kk] = ip->base_address[kk];
|
||||
for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) {
|
||||
if (reg_base_64)
|
||||
ip_hw_instance->base_addr[kk] =
|
||||
lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF;
|
||||
else
|
||||
ip_hw_instance->base_addr[kk] = ip->base_address[kk];
|
||||
}
|
||||
|
||||
kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
|
||||
ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
|
||||
res = kobject_add(&ip_hw_instance->kobj, NULL,
|
||||
"%d", ip_hw_instance->num_instance);
|
||||
next_ip:
|
||||
ip_offset += struct_size(ip, base_address, ip->num_base_address);
|
||||
if (reg_base_64)
|
||||
ip_offset += struct_size(ip, base_address_64,
|
||||
ip->num_base_address);
|
||||
else
|
||||
ip_offset += struct_size(ip, base_address,
|
||||
ip->num_base_address);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -972,7 +1073,7 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
|
|||
return res;
|
||||
}
|
||||
|
||||
amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips);
|
||||
amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips, !!ihdr->base_addr_64_bit);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -983,6 +1084,9 @@ static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
|
|||
struct kset *die_kset;
|
||||
int res, ii;
|
||||
|
||||
if (!adev->mman.discovery_bin)
|
||||
return -EINVAL;
|
||||
|
||||
adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL);
|
||||
if (!adev->ip_top)
|
||||
return -ENOMEM;
|
||||
|
@ -1082,7 +1186,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
|||
struct binary_header *bhdr;
|
||||
struct ip_discovery_header *ihdr;
|
||||
struct die_header *dhdr;
|
||||
struct ip *ip;
|
||||
struct ip_v4 *ip;
|
||||
uint16_t die_offset;
|
||||
uint16_t ip_offset;
|
||||
uint16_t num_dies;
|
||||
|
@ -1098,6 +1202,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
|||
return r;
|
||||
}
|
||||
|
||||
adev->gfx.xcc_mask = 0;
|
||||
adev->sdma.sdma_mask = 0;
|
||||
adev->vcn.inst_mask = 0;
|
||||
adev->jpeg.inst_mask = 0;
|
||||
bhdr = (struct binary_header *)adev->mman.discovery_bin;
|
||||
ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
|
||||
le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
|
||||
|
@ -1121,7 +1229,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
|||
le16_to_cpu(dhdr->die_id), num_ips);
|
||||
|
||||
for (j = 0; j < num_ips; j++) {
|
||||
ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
|
||||
ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
|
||||
|
||||
if (amdgpu_discovery_validate_ip(ip))
|
||||
goto next_ip;
|
||||
|
@ -1131,7 +1239,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
|||
DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
|
||||
hw_id_names[le16_to_cpu(ip->hw_id)],
|
||||
le16_to_cpu(ip->hw_id),
|
||||
ip->number_instance,
|
||||
ip->instance_number,
|
||||
ip->major, ip->minor,
|
||||
ip->revision);
|
||||
|
||||
|
@ -1145,23 +1253,33 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
|||
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
|
||||
ip->revision & 0xc0;
|
||||
ip->revision &= ~0xc0;
|
||||
if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES)
|
||||
if (adev->vcn.num_vcn_inst <
|
||||
AMDGPU_MAX_VCN_INSTANCES) {
|
||||
adev->vcn.num_vcn_inst++;
|
||||
else
|
||||
adev->vcn.inst_mask |=
|
||||
(1U << ip->instance_number);
|
||||
adev->jpeg.inst_mask |=
|
||||
(1U << ip->instance_number);
|
||||
} else {
|
||||
dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
|
||||
adev->vcn.num_vcn_inst + 1,
|
||||
AMDGPU_MAX_VCN_INSTANCES);
|
||||
}
|
||||
}
|
||||
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
|
||||
le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
|
||||
le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
|
||||
le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
|
||||
if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES)
|
||||
if (adev->sdma.num_instances <
|
||||
AMDGPU_MAX_SDMA_INSTANCES) {
|
||||
adev->sdma.num_instances++;
|
||||
else
|
||||
adev->sdma.sdma_mask |=
|
||||
(1U << ip->instance_number);
|
||||
} else {
|
||||
dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
|
||||
adev->sdma.num_instances + 1,
|
||||
AMDGPU_MAX_SDMA_INSTANCES);
|
||||
}
|
||||
}
|
||||
|
||||
if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
|
||||
|
@ -1169,20 +1287,38 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
|||
adev->umc.node_inst_num++;
|
||||
}
|
||||
|
||||
if (le16_to_cpu(ip->hw_id) == GC_HWID)
|
||||
adev->gfx.xcc_mask |=
|
||||
(1U << ip->instance_number);
|
||||
|
||||
for (k = 0; k < num_base_address; k++) {
|
||||
/*
|
||||
* convert the endianness of base addresses in place,
|
||||
* so that we don't need to convert them when accessing adev->reg_offset.
|
||||
*/
|
||||
ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
|
||||
if (ihdr->base_addr_64_bit)
|
||||
/* Truncate the 64bit base address from ip discovery
|
||||
* and only store lower 32bit ip base in reg_offset[].
|
||||
* Bits > 32 follows ASIC specific format, thus just
|
||||
* discard them and handle it within specific ASIC.
|
||||
* By this way reg_offset[] and related helpers can
|
||||
* stay unchanged.
|
||||
* The base address is in dwords, thus clear the
|
||||
* highest 2 bits to store.
|
||||
*/
|
||||
ip->base_address[k] =
|
||||
lower_32_bits(le64_to_cpu(ip->base_address_64[k])) & 0x3FFFFFFF;
|
||||
else
|
||||
ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
|
||||
DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
|
||||
}
|
||||
|
||||
for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
|
||||
if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
|
||||
if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id) &&
|
||||
hw_id_map[hw_ip] != 0) {
|
||||
DRM_DEBUG("set register base offset for %s\n",
|
||||
hw_id_names[le16_to_cpu(ip->hw_id)]);
|
||||
adev->reg_offset[hw_ip][ip->number_instance] =
|
||||
adev->reg_offset[hw_ip][ip->instance_number] =
|
||||
ip->base_address;
|
||||
/* Instance support is somewhat inconsistent.
|
||||
* SDMA is a good example. Sienna cichlid has 4 total
|
||||
|
@ -1193,69 +1329,22 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
|||
* example. On most chips there are multiple instances
|
||||
* with the same HWID.
|
||||
*/
|
||||
adev->ip_versions[hw_ip][ip->number_instance] =
|
||||
adev->ip_versions[hw_ip][ip->instance_number] =
|
||||
IP_VERSION(ip->major, ip->minor, ip->revision);
|
||||
}
|
||||
}
|
||||
|
||||
next_ip:
|
||||
ip_offset += struct_size(ip, base_address, ip->num_base_address);
|
||||
if (ihdr->base_addr_64_bit)
|
||||
ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
|
||||
else
|
||||
ip_offset += struct_size(ip, base_address, ip->num_base_address);
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_discovery_sysfs_init(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
|
||||
int *major, int *minor, int *revision)
|
||||
{
|
||||
struct binary_header *bhdr;
|
||||
struct ip_discovery_header *ihdr;
|
||||
struct die_header *dhdr;
|
||||
struct ip *ip;
|
||||
uint16_t die_offset;
|
||||
uint16_t ip_offset;
|
||||
uint16_t num_dies;
|
||||
uint16_t num_ips;
|
||||
int i, j;
|
||||
|
||||
if (!adev->mman.discovery_bin) {
|
||||
DRM_ERROR("ip discovery uninitialized\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bhdr = (struct binary_header *)adev->mman.discovery_bin;
|
||||
ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
|
||||
le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
|
||||
num_dies = le16_to_cpu(ihdr->num_dies);
|
||||
|
||||
for (i = 0; i < num_dies; i++) {
|
||||
die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
|
||||
dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
|
||||
num_ips = le16_to_cpu(dhdr->num_ips);
|
||||
ip_offset = die_offset + sizeof(*dhdr);
|
||||
|
||||
for (j = 0; j < num_ips; j++) {
|
||||
ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
|
||||
|
||||
if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) {
|
||||
if (major)
|
||||
*major = ip->major;
|
||||
if (minor)
|
||||
*minor = ip->minor;
|
||||
if (revision)
|
||||
*revision = ip->revision;
|
||||
return 0;
|
||||
}
|
||||
ip_offset += struct_size(ip, base_address, ip->num_base_address);
|
||||
}
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
|
||||
{
|
||||
int vcn_harvest_count = 0;
|
||||
|
@ -1266,7 +1355,8 @@ static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
|
|||
* so read harvest bit per IP data structure to set
|
||||
* harvest configuration.
|
||||
*/
|
||||
if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0)) {
|
||||
if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0) &&
|
||||
adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) {
|
||||
if ((adev->pdev->device == 0x731E &&
|
||||
(adev->pdev->revision == 0xC6 ||
|
||||
adev->pdev->revision == 0xC7)) ||
|
||||
|
@ -1706,6 +1796,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(13, 0, 3):
|
||||
case IP_VERSION(13, 0, 4):
|
||||
case IP_VERSION(13, 0, 5):
|
||||
case IP_VERSION(13, 0, 6):
|
||||
case IP_VERSION(13, 0, 7):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
case IP_VERSION(13, 0, 10):
|
||||
|
@ -1804,6 +1895,9 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(9, 4, 2):
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
|
||||
break;
|
||||
case IP_VERSION(9, 4, 3):
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
|
||||
break;
|
||||
case IP_VERSION(10, 1, 10):
|
||||
case IP_VERSION(10, 1, 2):
|
||||
case IP_VERSION(10, 1, 1):
|
||||
|
@ -1939,7 +2033,6 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(3, 1, 1):
|
||||
case IP_VERSION(3, 1, 2):
|
||||
case IP_VERSION(3, 0, 2):
|
||||
case IP_VERSION(3, 0, 192):
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
|
||||
|
@ -1952,7 +2045,11 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(4, 0, 4):
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
|
||||
return 0;
|
||||
break;
|
||||
case IP_VERSION(4, 0, 3):
|
||||
amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block);
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev,
|
||||
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
|
||||
|
@ -2000,6 +2097,17 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
|
||||
{
|
||||
switch (adev->ip_versions[GC_HWIP][0]) {
|
||||
case IP_VERSION(9, 4, 3):
|
||||
aqua_vanjaram_init_soc_config(adev);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
@ -2177,6 +2285,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
|||
break;
|
||||
}
|
||||
|
||||
amdgpu_discovery_init_soc_config(adev);
|
||||
amdgpu_discovery_sysfs_init(adev);
|
||||
|
||||
switch (adev->ip_versions[GC_HWIP][0]) {
|
||||
case IP_VERSION(9, 0, 1):
|
||||
case IP_VERSION(9, 2, 1):
|
||||
|
@ -2387,6 +2498,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(13, 0, 2):
|
||||
adev->smuio.funcs = &smuio_v13_0_funcs;
|
||||
break;
|
||||
case IP_VERSION(13, 0, 3):
|
||||
adev->smuio.funcs = &smuio_v13_0_3_funcs;
|
||||
if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
|
||||
adev->flags |= AMD_IS_APU;
|
||||
}
|
||||
break;
|
||||
case IP_VERSION(13, 0, 6):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
adev->smuio.funcs = &smuio_v13_0_6_funcs;
|
||||
|
|
|
@ -24,12 +24,10 @@
|
|||
#ifndef __AMDGPU_DISCOVERY__
|
||||
#define __AMDGPU_DISCOVERY__
|
||||
|
||||
#define DISCOVERY_TMR_SIZE (4 << 10)
|
||||
#define DISCOVERY_TMR_SIZE (8 << 10)
|
||||
#define DISCOVERY_TMR_OFFSET (64 << 10)
|
||||
|
||||
void amdgpu_discovery_fini(struct amdgpu_device *adev);
|
||||
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
|
||||
int *major, int *minor, int *revision);
|
||||
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
|
||||
|
||||
#endif /* __AMDGPU_DISCOVERY__ */
|
||||
|
|
|
@ -98,7 +98,7 @@ static void amdgpu_display_flip_callback(struct dma_fence *f,
|
|||
static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
|
||||
struct dma_fence **f)
|
||||
{
|
||||
struct dma_fence *fence= *f;
|
||||
struct dma_fence *fence = *f;
|
||||
|
||||
if (fence == NULL)
|
||||
return false;
|
||||
|
@ -1252,21 +1252,21 @@ const struct drm_mode_config_funcs amdgpu_mode_funcs = {
|
|||
.fb_create = amdgpu_display_user_framebuffer_create,
|
||||
};
|
||||
|
||||
static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] =
|
||||
{ { UNDERSCAN_OFF, "off" },
|
||||
static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = {
|
||||
{ UNDERSCAN_OFF, "off" },
|
||||
{ UNDERSCAN_ON, "on" },
|
||||
{ UNDERSCAN_AUTO, "auto" },
|
||||
};
|
||||
|
||||
static const struct drm_prop_enum_list amdgpu_audio_enum_list[] =
|
||||
{ { AMDGPU_AUDIO_DISABLE, "off" },
|
||||
static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = {
|
||||
{ AMDGPU_AUDIO_DISABLE, "off" },
|
||||
{ AMDGPU_AUDIO_ENABLE, "on" },
|
||||
{ AMDGPU_AUDIO_AUTO, "auto" },
|
||||
};
|
||||
|
||||
/* XXX support different dither options? spatial, temporal, both, etc. */
|
||||
static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
|
||||
{ { AMDGPU_FMT_DITHER_DISABLE, "off" },
|
||||
static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
|
||||
{ AMDGPU_FMT_DITHER_DISABLE, "off" },
|
||||
{ AMDGPU_FMT_DITHER_ENABLE, "on" },
|
||||
};
|
||||
|
||||
|
@ -1496,8 +1496,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
|
|||
ret |= DRM_SCANOUTPOS_ACCURATE;
|
||||
vbl_start = vbl & 0x1fff;
|
||||
vbl_end = (vbl >> 16) & 0x1fff;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/* No: Fake something reasonable which gives at least ok results. */
|
||||
vbl_start = mode->crtc_vdisplay;
|
||||
vbl_end = 0;
|
||||
|
|
|
@ -149,7 +149,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
|
|||
if (!bo->tbo.pin_count) {
|
||||
/* move buffer into GTT or VRAM */
|
||||
struct ttm_operation_ctx ctx = { false, false };
|
||||
unsigned domains = AMDGPU_GEM_DOMAIN_GTT;
|
||||
unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
|
||||
|
||||
if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
|
||||
attach->peer2peer) {
|
||||
|
@ -336,7 +336,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
|
|||
|
||||
ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_CPU, flags,
|
||||
ttm_bo_type_sg, resv, &gobj);
|
||||
ttm_bo_type_sg, resv, &gobj, 0);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ struct amdgpu_doorbell_index {
|
|||
uint32_t gfx_ring1;
|
||||
uint32_t gfx_userqueue_start;
|
||||
uint32_t gfx_userqueue_end;
|
||||
uint32_t sdma_engine[8];
|
||||
uint32_t sdma_engine[16];
|
||||
uint32_t mes_ring0;
|
||||
uint32_t mes_ring1;
|
||||
uint32_t ih;
|
||||
|
@ -86,6 +86,8 @@ struct amdgpu_doorbell_index {
|
|||
uint32_t max_assignment;
|
||||
/* Per engine SDMA doorbell size in dword */
|
||||
uint32_t sdma_doorbell_range;
|
||||
/* Per xcc doorbell size for KIQ/KCQ */
|
||||
uint32_t xcc_doorbell_range;
|
||||
};
|
||||
|
||||
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
|
||||
|
@ -164,7 +166,15 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
|
|||
AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
|
||||
AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
|
||||
|
||||
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F,
|
||||
/* kiq/kcq from second XCD. Max 8 XCDs */
|
||||
AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START = 0x190,
|
||||
/* 8 compute rings per GC. Max to 0x1CE */
|
||||
AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START = 0x197,
|
||||
|
||||
/* AID1 SDMA: 0x1D0 ~ 0x1F7 */
|
||||
AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START = 0x1D0,
|
||||
|
||||
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1F7,
|
||||
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
|
||||
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
|
||||
|
||||
|
@ -301,6 +311,36 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
|
|||
AMDGPU_DOORBELL64_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL64_ASSIGNMENT;
|
||||
|
||||
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
|
||||
/* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */
|
||||
|
||||
/* KIQ/HIQ/DIQ */
|
||||
AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000,
|
||||
AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001,
|
||||
AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002,
|
||||
/* Compute: 0x08 ~ 0x20 */
|
||||
AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008,
|
||||
AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F,
|
||||
AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010,
|
||||
AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F,
|
||||
AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020,
|
||||
|
||||
/* SDMA: 0x100 ~ 0x19F */
|
||||
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100,
|
||||
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
|
||||
/* IH: 0x1A0 ~ 0x1AF */
|
||||
AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0,
|
||||
/* VCN: 0x1B0 ~ 0x1D4 */
|
||||
AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0,
|
||||
AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,
|
||||
|
||||
AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
|
||||
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,
|
||||
|
||||
AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4,
|
||||
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
|
||||
|
||||
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
|
||||
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_reset.h"
|
||||
#include "../amdxcp/amdgpu_xcp_drv.h"
|
||||
|
||||
/*
|
||||
* KMS wrapper.
|
||||
|
@ -110,9 +111,11 @@
|
|||
* 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields:
|
||||
* tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size,
|
||||
* gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
|
||||
* 3.53.0 - Support for GFX11 CP GFX shadowing
|
||||
* 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
|
||||
*/
|
||||
#define KMS_DRIVER_MAJOR 3
|
||||
#define KMS_DRIVER_MINOR 52
|
||||
#define KMS_DRIVER_MINOR 54
|
||||
#define KMS_DRIVER_PATCHLEVEL 0
|
||||
|
||||
unsigned int amdgpu_vram_limit = UINT_MAX;
|
||||
|
@ -150,7 +153,7 @@ uint amdgpu_pg_mask = 0xffffffff;
|
|||
uint amdgpu_sdma_phase_quantum = 32;
|
||||
char *amdgpu_disable_cu;
|
||||
char *amdgpu_virtual_display;
|
||||
|
||||
bool enforce_isolation;
|
||||
/*
|
||||
* OverDrive(bit 14) disabled by default
|
||||
* GFX DCS(bit 19) disabled by default
|
||||
|
@ -191,6 +194,7 @@ int amdgpu_smartshift_bias;
|
|||
int amdgpu_use_xgmi_p2p = 1;
|
||||
int amdgpu_vcnfw_log;
|
||||
int amdgpu_sg_display = -1; /* auto */
|
||||
int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
|
||||
|
||||
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
|
||||
|
||||
|
@ -819,6 +823,13 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (
|
|||
module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* DOC: mtype_local (int)
|
||||
*/
|
||||
int amdgpu_mtype_local;
|
||||
MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
|
||||
module_param_named(mtype_local, amdgpu_mtype_local, int, 0444);
|
||||
|
||||
/**
|
||||
* DOC: pcie_p2p (bool)
|
||||
* Enable PCIe P2P (requires large-BAR). Default value: true (on)
|
||||
|
@ -948,6 +959,28 @@ MODULE_PARM_DESC(smu_pptable_id,
|
|||
"specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
|
||||
module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
|
||||
|
||||
/**
|
||||
* DOC: partition_mode (int)
|
||||
* Used to override the default SPX mode.
|
||||
*/
|
||||
MODULE_PARM_DESC(
|
||||
user_partt_mode,
|
||||
"specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \
|
||||
0 = AMDGPU_SPX_PARTITION_MODE, \
|
||||
1 = AMDGPU_DPX_PARTITION_MODE, \
|
||||
2 = AMDGPU_TPX_PARTITION_MODE, \
|
||||
3 = AMDGPU_QPX_PARTITION_MODE, \
|
||||
4 = AMDGPU_CPX_PARTITION_MODE)");
|
||||
module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
|
||||
|
||||
|
||||
/**
|
||||
* DOC: enforce_isolation (bool)
|
||||
* enforce process isolation between graphics and compute via using the same reserved vmid.
|
||||
*/
|
||||
module_param(enforce_isolation, bool, 0444);
|
||||
MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
|
||||
|
||||
/* These devices are not supported by amdgpu.
|
||||
* They are supported by the mach64, r128, radeon drivers
|
||||
*/
|
||||
|
@ -1615,6 +1648,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
|
|||
0x5874,
|
||||
0x5940,
|
||||
0x5941,
|
||||
0x5b70,
|
||||
0x5b72,
|
||||
0x5b73,
|
||||
0x5b74,
|
||||
|
@ -1660,7 +1694,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
|
|||
};
|
||||
|
||||
static const struct pci_device_id pciidlist[] = {
|
||||
#ifdef CONFIG_DRM_AMDGPU_SI
|
||||
#ifdef CONFIG_DRM_AMDGPU_SI
|
||||
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
|
||||
{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
|
||||
{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
|
||||
|
@ -2017,6 +2051,11 @@ static const struct pci_device_id pciidlist[] = {
|
|||
.class_mask = 0xffffff,
|
||||
.driver_data = CHIP_IP_DISCOVERY },
|
||||
|
||||
{ PCI_DEVICE(0x1002, PCI_ANY_ID),
|
||||
.class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
|
||||
.class_mask = 0xffffff,
|
||||
.driver_data = CHIP_IP_DISCOVERY },
|
||||
|
||||
{0, 0, 0}
|
||||
};
|
||||
|
||||
|
@ -2161,6 +2200,10 @@ retry_init:
|
|||
goto err_pci;
|
||||
}
|
||||
|
||||
ret = amdgpu_xcp_dev_register(adev, ent);
|
||||
if (ret)
|
||||
goto err_pci;
|
||||
|
||||
/*
|
||||
* 1. don't init fbdev on hw without DCE
|
||||
* 2. don't init fbdev if there are no connectors
|
||||
|
@ -2233,6 +2276,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
|
|||
struct drm_device *dev = pci_get_drvdata(pdev);
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
|
||||
amdgpu_xcp_dev_unplug(adev);
|
||||
drm_dev_unplug(dev);
|
||||
|
||||
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
|
||||
|
@ -2819,6 +2863,33 @@ static const struct drm_driver amdgpu_kms_driver = {
|
|||
.patchlevel = KMS_DRIVER_PATCHLEVEL,
|
||||
};
|
||||
|
||||
const struct drm_driver amdgpu_partition_driver = {
|
||||
.driver_features =
|
||||
DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ |
|
||||
DRIVER_SYNCOBJ_TIMELINE,
|
||||
.open = amdgpu_driver_open_kms,
|
||||
.postclose = amdgpu_driver_postclose_kms,
|
||||
.lastclose = amdgpu_driver_lastclose_kms,
|
||||
.ioctls = amdgpu_ioctls_kms,
|
||||
.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
|
||||
.dumb_create = amdgpu_mode_dumb_create,
|
||||
.dumb_map_offset = amdgpu_mode_dumb_mmap,
|
||||
.fops = &amdgpu_driver_kms_fops,
|
||||
.release = &amdgpu_driver_release_kms,
|
||||
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
|
||||
.gem_prime_import = amdgpu_gem_prime_import,
|
||||
.gem_prime_mmap = drm_gem_prime_mmap,
|
||||
|
||||
.name = DRIVER_NAME,
|
||||
.desc = DRIVER_DESC,
|
||||
.date = DRIVER_DATE,
|
||||
.major = KMS_DRIVER_MAJOR,
|
||||
.minor = KMS_DRIVER_MINOR,
|
||||
.patchlevel = KMS_DRIVER_PATCHLEVEL,
|
||||
};
|
||||
|
||||
static struct pci_error_handlers amdgpu_pci_err_handler = {
|
||||
.error_detected = amdgpu_pci_error_detected,
|
||||
.mmio_enabled = amdgpu_pci_mmio_enabled,
|
||||
|
@ -2886,9 +2957,11 @@ static void __exit amdgpu_exit(void)
|
|||
amdgpu_amdkfd_fini();
|
||||
pci_unregister_driver(&amdgpu_kms_pci_driver);
|
||||
amdgpu_unregister_atpx_handler();
|
||||
amdgpu_acpi_release();
|
||||
amdgpu_sync_fini();
|
||||
amdgpu_fence_slab_fini();
|
||||
mmu_notifier_synchronize();
|
||||
amdgpu_xcp_drv_release();
|
||||
}
|
||||
|
||||
module_init(amdgpu_init);
|
||||
|
|
|
@ -42,6 +42,8 @@
|
|||
#define DRIVER_DESC "AMD GPU"
|
||||
#define DRIVER_DATE "20150101"
|
||||
|
||||
extern const struct drm_driver amdgpu_partition_driver;
|
||||
|
||||
long amdgpu_drm_ioctl(struct file *filp,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
|
||||
|
|
|
@ -70,6 +70,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
|
|||
drm_for_each_connector_iter(connector, &iter) {
|
||||
if (connector->encoder == encoder) {
|
||||
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
|
||||
|
||||
amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
|
||||
DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n",
|
||||
amdgpu_encoder->active_device, amdgpu_encoder->devices,
|
||||
|
@ -165,12 +166,12 @@ void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
|
|||
{
|
||||
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
|
||||
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
|
||||
unsigned hblank = native_mode->htotal - native_mode->hdisplay;
|
||||
unsigned vblank = native_mode->vtotal - native_mode->vdisplay;
|
||||
unsigned hover = native_mode->hsync_start - native_mode->hdisplay;
|
||||
unsigned vover = native_mode->vsync_start - native_mode->vdisplay;
|
||||
unsigned hsync_width = native_mode->hsync_end - native_mode->hsync_start;
|
||||
unsigned vsync_width = native_mode->vsync_end - native_mode->vsync_start;
|
||||
unsigned int hblank = native_mode->htotal - native_mode->hdisplay;
|
||||
unsigned int vblank = native_mode->vtotal - native_mode->vdisplay;
|
||||
unsigned int hover = native_mode->hsync_start - native_mode->hdisplay;
|
||||
unsigned int vover = native_mode->vsync_start - native_mode->vdisplay;
|
||||
unsigned int hsync_width = native_mode->hsync_end - native_mode->hsync_start;
|
||||
unsigned int vsync_width = native_mode->vsync_end - native_mode->vsync_start;
|
||||
|
||||
adjusted_mode->clock = native_mode->clock;
|
||||
adjusted_mode->flags = native_mode->flags;
|
||||
|
|
|
@ -42,7 +42,6 @@
|
|||
#include "amdgpu_reset.h"
|
||||
|
||||
/*
|
||||
* Fences
|
||||
* Fences mark an event in the GPUs pipeline and are used
|
||||
* for GPU/CPU synchronization. When the fence is written,
|
||||
* it is expected that all buffers associated with that fence
|
||||
|
@ -140,7 +139,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
|
|||
* Returns 0 on success, -ENOMEM on failure.
|
||||
*/
|
||||
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job,
|
||||
unsigned flags)
|
||||
unsigned int flags)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct dma_fence *fence;
|
||||
|
@ -174,11 +173,11 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
|
|||
adev->fence_context + ring->idx, seq);
|
||||
/* Against remove in amdgpu_job_{free, free_cb} */
|
||||
dma_fence_get(fence);
|
||||
}
|
||||
else
|
||||
} else {
|
||||
dma_fence_init(fence, &amdgpu_fence_ops,
|
||||
&ring->fence_drv.lock,
|
||||
adev->fence_context + ring->idx, seq);
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
|
||||
|
@ -377,14 +376,11 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
|
|||
uint32_t wait_seq,
|
||||
signed long timeout)
|
||||
{
|
||||
uint32_t seq;
|
||||
|
||||
do {
|
||||
seq = amdgpu_fence_read(ring);
|
||||
udelay(5);
|
||||
timeout -= 5;
|
||||
} while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
|
||||
|
||||
while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
|
||||
udelay(2);
|
||||
timeout -= 2;
|
||||
}
|
||||
return timeout > 0 ? timeout : 0;
|
||||
}
|
||||
/**
|
||||
|
@ -396,7 +392,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
|
|||
* Returns the number of emitted fences on the ring. Used by the
|
||||
* dynpm code to ring track activity.
|
||||
*/
|
||||
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
|
||||
unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
|
||||
{
|
||||
uint64_t emitted;
|
||||
|
||||
|
@ -475,7 +471,7 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
|
|||
*/
|
||||
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
|
||||
struct amdgpu_irq_src *irq_src,
|
||||
unsigned irq_type)
|
||||
unsigned int irq_type)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint64_t index;
|
||||
|
@ -582,7 +578,8 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
|
|||
if (r)
|
||||
amdgpu_fence_driver_force_completion(ring);
|
||||
|
||||
if (ring->fence_drv.irq_src)
|
||||
if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
|
||||
ring->fence_drv.irq_src)
|
||||
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
|
||||
ring->fence_drv.irq_type);
|
||||
|
||||
|
@ -653,6 +650,7 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
|
|||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !ring->fence_drv.initialized)
|
||||
continue;
|
||||
|
||||
|
@ -835,11 +833,12 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = {
|
|||
#if defined(CONFIG_DEBUG_FS)
|
||||
static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_device *adev = m->private;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !ring->fence_drv.initialized)
|
||||
continue;
|
||||
|
||||
|
@ -913,6 +912,7 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work)
|
|||
reset_work);
|
||||
|
||||
struct amdgpu_reset_context reset_context;
|
||||
|
||||
memset(&reset_context, 0, sizeof(reset_context));
|
||||
|
||||
reset_context.method = AMD_RESET_METHOD_NONE;
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#endif
|
||||
#include "amdgpu.h"
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/ttm/ttm_tt.h>
|
||||
|
||||
/*
|
||||
* GART
|
||||
|
@ -102,6 +103,142 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
|
|||
adev->dummy_page_addr = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Allocate system memory for GART page table for ASICs that don't have
|
||||
* dedicated VRAM.
|
||||
* Returns 0 for success, error for failure.
|
||||
*/
|
||||
int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned int order = get_order(adev->gart.table_size);
|
||||
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
|
||||
struct amdgpu_bo *bo = NULL;
|
||||
struct sg_table *sg = NULL;
|
||||
struct amdgpu_bo_param bp;
|
||||
dma_addr_t dma_addr;
|
||||
struct page *p;
|
||||
int ret;
|
||||
|
||||
if (adev->gart.bo != NULL)
|
||||
return 0;
|
||||
|
||||
p = alloc_pages(gfp_flags, order);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
/* If the hardware does not support UTCL2 snooping of the CPU caches
|
||||
* then set_memory_wc() could be used as a workaround to mark the pages
|
||||
* as write combine memory.
|
||||
*/
|
||||
dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size,
|
||||
DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&adev->pdev->dev, dma_addr)) {
|
||||
dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
|
||||
__free_pages(p, order);
|
||||
p = NULL;
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
|
||||
/* Create SG table */
|
||||
sg = kmalloc(sizeof(*sg), GFP_KERNEL);
|
||||
if (!sg) {
|
||||
ret = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
ret = sg_alloc_table(sg, 1, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
sg_dma_address(sg->sgl) = dma_addr;
|
||||
sg->sgl->length = adev->gart.table_size;
|
||||
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
||||
sg->sgl->dma_length = adev->gart.table_size;
|
||||
#endif
|
||||
/* Create SG BO */
|
||||
memset(&bp, 0, sizeof(bp));
|
||||
bp.size = adev->gart.table_size;
|
||||
bp.byte_align = PAGE_SIZE;
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||
bp.type = ttm_bo_type_sg;
|
||||
bp.resv = NULL;
|
||||
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
|
||||
bp.flags = 0;
|
||||
ret = amdgpu_bo_create(adev, &bp, &bo);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
bo->tbo.sg = sg;
|
||||
bo->tbo.ttm->sg = sg;
|
||||
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
|
||||
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
|
||||
|
||||
ret = amdgpu_bo_reserve(bo, true);
|
||||
if (ret) {
|
||||
dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
WARN(ret, "Pinning the GART table failed");
|
||||
if (ret)
|
||||
goto error_resv;
|
||||
|
||||
adev->gart.bo = bo;
|
||||
adev->gart.ptr = page_to_virt(p);
|
||||
/* Make GART table accessible in VMID0 */
|
||||
ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo);
|
||||
if (ret)
|
||||
amdgpu_gart_table_ram_free(adev);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
return 0;
|
||||
|
||||
error_resv:
|
||||
amdgpu_bo_unreserve(bo);
|
||||
error:
|
||||
amdgpu_bo_unref(&bo);
|
||||
if (sg) {
|
||||
sg_free_table(sg);
|
||||
kfree(sg);
|
||||
}
|
||||
__free_pages(p, order);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gart_table_ram_free - free gart page table system ram
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Free the system memory used for the GART page tableon ASICs that don't
|
||||
* have dedicated VRAM.
|
||||
*/
|
||||
void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned int order = get_order(adev->gart.table_size);
|
||||
struct sg_table *sg = adev->gart.bo->tbo.sg;
|
||||
struct page *p;
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_bo_reserve(adev->gart.bo, false);
|
||||
if (!ret) {
|
||||
amdgpu_bo_unpin(adev->gart.bo);
|
||||
amdgpu_bo_unreserve(adev->gart.bo);
|
||||
}
|
||||
amdgpu_bo_unref(&adev->gart.bo);
|
||||
sg_free_table(sg);
|
||||
kfree(sg);
|
||||
p = virt_to_page(adev->gart.ptr);
|
||||
__free_pages(p, order);
|
||||
|
||||
adev->gart.ptr = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gart_table_vram_alloc - allocate vram for gart page table
|
||||
*
|
||||
|
@ -182,7 +319,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
|
|||
}
|
||||
mb();
|
||||
amdgpu_device_flush_hdp(adev, NULL);
|
||||
for (i = 0; i < adev->num_vmhubs; i++)
|
||||
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
|
||||
|
||||
drm_dev_exit(idx);
|
||||
|
@ -264,7 +401,7 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
|
|||
|
||||
mb();
|
||||
amdgpu_device_flush_hdp(adev, NULL);
|
||||
for (i = 0; i < adev->num_vmhubs; i++)
|
||||
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -51,6 +51,8 @@ struct amdgpu_gart {
|
|||
uint64_t gart_pte_flags;
|
||||
};
|
||||
|
||||
int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
|
||||
void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
|
||||
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
|
||||
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
|
||||
int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
|
||||
|
|
|
@ -98,7 +98,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
|
|||
int alignment, u32 initial_domain,
|
||||
u64 flags, enum ttm_bo_type type,
|
||||
struct dma_resv *resv,
|
||||
struct drm_gem_object **obj)
|
||||
struct drm_gem_object **obj, int8_t xcp_id_plus1)
|
||||
{
|
||||
struct amdgpu_bo *bo;
|
||||
struct amdgpu_bo_user *ubo;
|
||||
|
@ -116,6 +116,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
|
|||
bp.flags = flags;
|
||||
bp.domain = initial_domain;
|
||||
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
|
||||
bp.xcp_id_plus1 = xcp_id_plus1;
|
||||
|
||||
r = amdgpu_bo_create_user(adev, &bp, &ubo);
|
||||
if (r)
|
||||
|
@ -336,7 +337,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
|
|||
retry:
|
||||
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
|
||||
initial_domain,
|
||||
flags, ttm_bo_type_device, resv, &gobj);
|
||||
flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1);
|
||||
if (r && r != -ERESTARTSYS) {
|
||||
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
|
||||
flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
|
@ -379,6 +380,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
|||
struct ttm_operation_ctx ctx = { true, false };
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
struct drm_amdgpu_gem_userptr *args = data;
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct drm_gem_object *gobj;
|
||||
struct hmm_range *range;
|
||||
struct amdgpu_bo *bo;
|
||||
|
@ -405,7 +407,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
|||
|
||||
/* create a gem object to contain this object in */
|
||||
r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
|
||||
0, ttm_bo_type_device, NULL, &gobj);
|
||||
0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -908,6 +910,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
|
|||
struct drm_mode_create_dumb *args)
|
||||
{
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
|
||||
struct drm_gem_object *gobj;
|
||||
uint32_t handle;
|
||||
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
|
||||
|
@ -931,7 +934,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
|
|||
domain = amdgpu_bo_get_preferred_domain(adev,
|
||||
amdgpu_display_supported_domains(adev, flags));
|
||||
r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
|
||||
ttm_bo_type_device, NULL, &gobj);
|
||||
ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
|
||||
if (r)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -948,7 +951,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
|
|||
#if defined(CONFIG_DEBUG_FS)
|
||||
static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_device *adev = m->private;
|
||||
struct drm_device *dev = adev_to_drm(adev);
|
||||
struct drm_file *file;
|
||||
int r;
|
||||
|
|
|
@ -43,8 +43,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
|
|||
int alignment, u32 initial_domain,
|
||||
u64 flags, enum ttm_bo_type type,
|
||||
struct dma_resv *resv,
|
||||
struct drm_gem_object **obj);
|
||||
|
||||
struct drm_gem_object **obj, int8_t xcp_id_plus1);
|
||||
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
|
||||
struct drm_device *dev,
|
||||
struct drm_mode_create_dumb *args);
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "amdgpu_gfx.h"
|
||||
#include "amdgpu_rlc.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
|
||||
/* delay 0.1 second to enable gfx off feature */
|
||||
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
|
||||
|
@ -63,10 +64,10 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
|
|||
}
|
||||
|
||||
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
|
||||
int mec, int pipe, int queue)
|
||||
int xcc_id, int mec, int pipe, int queue)
|
||||
{
|
||||
return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
|
||||
adev->gfx.mec.queue_bitmap);
|
||||
adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
|
||||
}
|
||||
|
||||
int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
|
||||
|
@ -204,29 +205,38 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
|
|||
|
||||
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, queue, pipe;
|
||||
int i, j, queue, pipe;
|
||||
bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
|
||||
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
|
||||
adev->gfx.mec.num_queue_per_pipe,
|
||||
adev->gfx.num_compute_rings);
|
||||
int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
|
||||
|
||||
if (multipipe_policy) {
|
||||
/* policy: make queues evenly cross all pipes on MEC1 only */
|
||||
for (i = 0; i < max_queues_per_mec; i++) {
|
||||
pipe = i % adev->gfx.mec.num_pipe_per_mec;
|
||||
queue = (i / adev->gfx.mec.num_pipe_per_mec) %
|
||||
adev->gfx.mec.num_queue_per_pipe;
|
||||
/* policy: make queues evenly cross all pipes on MEC1 only
|
||||
* for multiple xcc, just use the original policy for simplicity */
|
||||
for (j = 0; j < num_xcc; j++) {
|
||||
for (i = 0; i < max_queues_per_mec; i++) {
|
||||
pipe = i % adev->gfx.mec.num_pipe_per_mec;
|
||||
queue = (i / adev->gfx.mec.num_pipe_per_mec) %
|
||||
adev->gfx.mec.num_queue_per_pipe;
|
||||
|
||||
set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
|
||||
adev->gfx.mec.queue_bitmap);
|
||||
set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
|
||||
adev->gfx.mec_bitmap[j].queue_bitmap);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* policy: amdgpu owns all queues in the given pipe */
|
||||
for (i = 0; i < max_queues_per_mec; ++i)
|
||||
set_bit(i, adev->gfx.mec.queue_bitmap);
|
||||
for (j = 0; j < num_xcc; j++) {
|
||||
for (i = 0; i < max_queues_per_mec; ++i)
|
||||
set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
|
||||
for (j = 0; j < num_xcc; j++) {
|
||||
dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
|
||||
bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
|
||||
|
@ -258,7 +268,7 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
|
|||
}
|
||||
|
||||
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
struct amdgpu_ring *ring, int xcc_id)
|
||||
{
|
||||
int queue_bit;
|
||||
int mec, pipe, queue;
|
||||
|
@ -268,7 +278,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
|
|||
* adev->gfx.mec.num_queue_per_pipe;
|
||||
|
||||
while (--queue_bit >= 0) {
|
||||
if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
|
||||
if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
|
||||
continue;
|
||||
|
||||
amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
|
||||
|
@ -294,9 +304,9 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
|
|||
|
||||
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
struct amdgpu_irq_src *irq)
|
||||
struct amdgpu_irq_src *irq, int xcc_id)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
int r = 0;
|
||||
|
||||
spin_lock_init(&kiq->ring_lock);
|
||||
|
@ -304,16 +314,20 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
|
|||
ring->adev = NULL;
|
||||
ring->ring_obj = NULL;
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = adev->doorbell_index.kiq;
|
||||
ring->vm_hub = AMDGPU_GFXHUB_0;
|
||||
ring->xcc_id = xcc_id;
|
||||
ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
|
||||
ring->doorbell_index =
|
||||
(adev->doorbell_index.kiq +
|
||||
xcc_id * adev->doorbell_index.xcc_doorbell_range)
|
||||
<< 1;
|
||||
|
||||
r = amdgpu_gfx_kiq_acquire(adev, ring);
|
||||
r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ring->eop_gpu_addr = kiq->eop_gpu_addr;
|
||||
ring->no_scheduler = true;
|
||||
sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
|
||||
sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue);
|
||||
r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
|
||||
AMDGPU_RING_PRIO_DEFAULT, NULL);
|
||||
if (r)
|
||||
|
@ -327,19 +341,19 @@ void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
|
|||
amdgpu_ring_fini(ring);
|
||||
}
|
||||
|
||||
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
|
||||
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
|
||||
amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
|
||||
}
|
||||
|
||||
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
|
||||
unsigned hpd_size)
|
||||
unsigned hpd_size, int xcc_id)
|
||||
{
|
||||
int r;
|
||||
u32 *hpd;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
|
||||
|
@ -362,13 +376,18 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
|
|||
|
||||
/* create MQD for each compute/gfx queue */
|
||||
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
|
||||
unsigned mqd_size)
|
||||
unsigned mqd_size, int xcc_id)
|
||||
{
|
||||
struct amdgpu_ring *ring = NULL;
|
||||
int r, i;
|
||||
int r, i, j;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
u32 domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
|
||||
/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
|
||||
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
|
||||
domain |= AMDGPU_GEM_DOMAIN_VRAM;
|
||||
|
||||
/* create MQD for KIQ */
|
||||
ring = &adev->gfx.kiq.ring;
|
||||
if (!adev->enable_mes_kiq && !ring->mqd_obj) {
|
||||
/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
|
||||
* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
|
||||
|
@ -387,8 +406,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
/* prepare MQD backup */
|
||||
adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
|
||||
if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
|
||||
kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
|
||||
if (!kiq->mqd_backup)
|
||||
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
|
||||
}
|
||||
|
||||
|
@ -398,13 +417,14 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
|
|||
ring = &adev->gfx.gfx_ring[i];
|
||||
if (!ring->mqd_obj) {
|
||||
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
|
||||
domain, &ring->mqd_obj,
|
||||
&ring->mqd_gpu_addr, &ring->mqd_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
ring->mqd_size = mqd_size;
|
||||
/* prepare MQD backup */
|
||||
adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
|
||||
if (!adev->gfx.me.mqd_backup[i])
|
||||
|
@ -415,19 +435,21 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
|
|||
|
||||
/* create MQD for each KCQ */
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
j = i + xcc_id * adev->gfx.num_compute_rings;
|
||||
ring = &adev->gfx.compute_ring[j];
|
||||
if (!ring->mqd_obj) {
|
||||
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
|
||||
domain, &ring->mqd_obj,
|
||||
&ring->mqd_gpu_addr, &ring->mqd_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
ring->mqd_size = mqd_size;
|
||||
/* prepare MQD backup */
|
||||
adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
|
||||
if (!adev->gfx.mec.mqd_backup[i])
|
||||
adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
|
||||
if (!adev->gfx.mec.mqd_backup[j])
|
||||
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
|
||||
}
|
||||
}
|
||||
|
@ -435,10 +457,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
|
||||
void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
struct amdgpu_ring *ring = NULL;
|
||||
int i;
|
||||
int i, j;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
|
||||
if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
|
@ -451,43 +474,81 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
|
|||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
kfree(adev->gfx.mec.mqd_backup[i]);
|
||||
j = i + xcc_id * adev->gfx.num_compute_rings;
|
||||
ring = &adev->gfx.compute_ring[j];
|
||||
kfree(adev->gfx.mec.mqd_backup[j]);
|
||||
amdgpu_bo_free_kernel(&ring->mqd_obj,
|
||||
&ring->mqd_gpu_addr,
|
||||
&ring->mqd_ptr);
|
||||
}
|
||||
|
||||
ring = &adev->gfx.kiq.ring;
|
||||
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
|
||||
ring = &kiq->ring;
|
||||
kfree(kiq->mqd_backup);
|
||||
amdgpu_bo_free_kernel(&ring->mqd_obj,
|
||||
&ring->mqd_gpu_addr,
|
||||
&ring->mqd_ptr);
|
||||
}
|
||||
|
||||
int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
|
||||
int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
struct amdgpu_ring *kiq_ring = &kiq->ring;
|
||||
int i, r = 0;
|
||||
int j;
|
||||
|
||||
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&adev->gfx.kiq.ring_lock);
|
||||
spin_lock(&kiq->ring_lock);
|
||||
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
|
||||
adev->gfx.num_compute_rings)) {
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
spin_unlock(&kiq->ring_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
j = i + xcc_id * adev->gfx.num_compute_rings;
|
||||
kiq->pmf->kiq_unmap_queues(kiq_ring,
|
||||
&adev->gfx.compute_ring[j],
|
||||
RESET_QUEUES, 0, 0);
|
||||
}
|
||||
|
||||
if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
|
||||
if (kiq_ring->sched.ready && !adev->job_hang)
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
spin_unlock(&kiq->ring_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
struct amdgpu_ring *kiq_ring = &kiq->ring;
|
||||
int i, r = 0;
|
||||
int j;
|
||||
|
||||
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&kiq->ring_lock);
|
||||
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
|
||||
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
|
||||
adev->gfx.num_gfx_rings)) {
|
||||
spin_unlock(&kiq->ring_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
j = i + xcc_id * adev->gfx.num_gfx_rings;
|
||||
kiq->pmf->kiq_unmap_queues(kiq_ring,
|
||||
&adev->gfx.gfx_ring[j],
|
||||
PREEMPT_QUEUES, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
spin_unlock(&kiq->ring_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -505,18 +566,18 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
|
|||
return set_resource_bit;
|
||||
}
|
||||
|
||||
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
|
||||
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
struct amdgpu_ring *kiq_ring = &kiq->ring;
|
||||
uint64_t queue_mask = 0;
|
||||
int r, i;
|
||||
int r, i, j;
|
||||
|
||||
if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
|
||||
if (!test_bit(i, adev->gfx.mec.queue_bitmap))
|
||||
if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
|
||||
continue;
|
||||
|
||||
/* This situation may be hit in the future if a new HW
|
||||
|
@ -532,13 +593,15 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
|
|||
|
||||
DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
|
||||
kiq_ring->queue);
|
||||
spin_lock(&adev->gfx.kiq.ring_lock);
|
||||
amdgpu_device_flush_hdp(adev, NULL);
|
||||
|
||||
spin_lock(&kiq->ring_lock);
|
||||
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
|
||||
adev->gfx.num_compute_rings +
|
||||
kiq->pmf->set_resources_size);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
spin_unlock(&kiq->ring_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -546,11 +609,51 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
|
|||
queue_mask = ~0ULL;
|
||||
|
||||
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
j = i + xcc_id * adev->gfx.num_compute_rings;
|
||||
kiq->pmf->kiq_map_queues(kiq_ring,
|
||||
&adev->gfx.compute_ring[j]);
|
||||
}
|
||||
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
spin_unlock(&kiq->ring_lock);
|
||||
if (r)
|
||||
DRM_ERROR("KCQ enable failed\n");
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
|
||||
struct amdgpu_ring *kiq_ring = &kiq->ring;
|
||||
int r, i, j;
|
||||
|
||||
if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
|
||||
return -EINVAL;
|
||||
|
||||
amdgpu_device_flush_hdp(adev, NULL);
|
||||
|
||||
spin_lock(&kiq->ring_lock);
|
||||
/* No need to map kcq on the slave */
|
||||
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
|
||||
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
|
||||
adev->gfx.num_gfx_rings);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
|
||||
spin_unlock(&kiq->ring_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
j = i + xcc_id * adev->gfx.num_gfx_rings;
|
||||
kiq->pmf->kiq_map_queues(kiq_ring,
|
||||
&adev->gfx.gfx_ring[j]);
|
||||
}
|
||||
}
|
||||
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
spin_unlock(&kiq->ring_lock);
|
||||
if (r)
|
||||
DRM_ERROR("KCQ enable failed\n");
|
||||
|
||||
|
@ -785,12 +888,31 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
|
||||
void *ras_error_status,
|
||||
void (*func)(struct amdgpu_device *adev, void *ras_error_status,
|
||||
int xcc_id))
|
||||
{
|
||||
int i;
|
||||
int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
|
||||
uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
|
||||
|
||||
if (err_data) {
|
||||
err_data->ue_count = 0;
|
||||
err_data->ce_count = 0;
|
||||
}
|
||||
|
||||
for_each_inst(i, xcc_mask)
|
||||
func(adev, ras_error_status, i);
|
||||
}
|
||||
|
||||
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
|
||||
{
|
||||
signed long r, cnt = 0;
|
||||
unsigned long flags;
|
||||
uint32_t seq, reg_val_offs = 0, value = 0;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
|
||||
if (amdgpu_device_skip_hw_access(adev))
|
||||
|
@ -858,7 +980,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
|
|||
signed long r, cnt = 0;
|
||||
unsigned long flags;
|
||||
uint32_t seq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
|
||||
BUG_ON(!ring->funcs->emit_wreg);
|
||||
|
@ -1062,3 +1184,125 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
|
|||
adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
|
||||
adev->gfx.num_xcc_per_xcp : 1));
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
|
||||
struct device_attribute *addr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
int mode;
|
||||
|
||||
mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
|
||||
AMDGPU_XCP_FL_NONE);
|
||||
|
||||
return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
|
||||
struct device_attribute *addr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
enum amdgpu_gfx_partition mode;
|
||||
int ret = 0, num_xcc;
|
||||
|
||||
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
|
||||
if (num_xcc % 2 != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strncasecmp("SPX", buf, strlen("SPX"))) {
|
||||
mode = AMDGPU_SPX_PARTITION_MODE;
|
||||
} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
|
||||
/*
|
||||
* DPX mode needs AIDs to be in multiple of 2.
|
||||
* Each AID connects 2 XCCs.
|
||||
*/
|
||||
if (num_xcc%4)
|
||||
return -EINVAL;
|
||||
mode = AMDGPU_DPX_PARTITION_MODE;
|
||||
} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
|
||||
if (num_xcc != 6)
|
||||
return -EINVAL;
|
||||
mode = AMDGPU_TPX_PARTITION_MODE;
|
||||
} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
|
||||
if (num_xcc != 8)
|
||||
return -EINVAL;
|
||||
mode = AMDGPU_QPX_PARTITION_MODE;
|
||||
} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
|
||||
mode = AMDGPU_CPX_PARTITION_MODE;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
|
||||
struct device_attribute *addr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
char *supported_partition;
|
||||
|
||||
/* TBD */
|
||||
switch (NUM_XCC(adev->gfx.xcc_mask)) {
|
||||
case 8:
|
||||
supported_partition = "SPX, DPX, QPX, CPX";
|
||||
break;
|
||||
case 6:
|
||||
supported_partition = "SPX, TPX, CPX";
|
||||
break;
|
||||
case 4:
|
||||
supported_partition = "SPX, DPX, CPX";
|
||||
break;
|
||||
/* this seems only existing in emulation phase */
|
||||
case 2:
|
||||
supported_partition = "SPX, CPX";
|
||||
break;
|
||||
default:
|
||||
supported_partition = "Not supported";
|
||||
break;
|
||||
}
|
||||
|
||||
return sysfs_emit(buf, "%s\n", supported_partition);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR,
|
||||
amdgpu_gfx_get_current_compute_partition,
|
||||
amdgpu_gfx_set_compute_partition);
|
||||
|
||||
static DEVICE_ATTR(available_compute_partition, S_IRUGO,
|
||||
amdgpu_gfx_get_available_compute_partition, NULL);
|
||||
|
||||
int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
device_remove_file(adev->dev, &dev_attr_current_compute_partition);
|
||||
device_remove_file(adev->dev, &dev_attr_available_compute_partition);
|
||||
}
|
||||
|
|
|
@ -61,7 +61,42 @@ enum amdgpu_gfx_partition {
|
|||
AMDGPU_TPX_PARTITION_MODE = 2,
|
||||
AMDGPU_QPX_PARTITION_MODE = 3,
|
||||
AMDGPU_CPX_PARTITION_MODE = 4,
|
||||
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE,
|
||||
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1,
|
||||
/* Automatically choose the right mode */
|
||||
AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
|
||||
};
|
||||
|
||||
#define NUM_XCC(x) hweight16(x)
|
||||
|
||||
enum amdgpu_pkg_type {
|
||||
AMDGPU_PKG_TYPE_APU = 2,
|
||||
AMDGPU_PKG_TYPE_UNKNOWN,
|
||||
};
|
||||
|
||||
enum amdgpu_gfx_ras_mem_id_type {
|
||||
AMDGPU_GFX_CP_MEM = 0,
|
||||
AMDGPU_GFX_GCEA_MEM,
|
||||
AMDGPU_GFX_GC_CANE_MEM,
|
||||
AMDGPU_GFX_GCUTCL2_MEM,
|
||||
AMDGPU_GFX_GDS_MEM,
|
||||
AMDGPU_GFX_LDS_MEM,
|
||||
AMDGPU_GFX_RLC_MEM,
|
||||
AMDGPU_GFX_SP_MEM,
|
||||
AMDGPU_GFX_SPI_MEM,
|
||||
AMDGPU_GFX_SQC_MEM,
|
||||
AMDGPU_GFX_SQ_MEM,
|
||||
AMDGPU_GFX_TA_MEM,
|
||||
AMDGPU_GFX_TCC_MEM,
|
||||
AMDGPU_GFX_TCA_MEM,
|
||||
AMDGPU_GFX_TCI_MEM,
|
||||
AMDGPU_GFX_TCP_MEM,
|
||||
AMDGPU_GFX_TD_MEM,
|
||||
AMDGPU_GFX_TCX_MEM,
|
||||
AMDGPU_GFX_ATC_L2_MEM,
|
||||
AMDGPU_GFX_UTCL2_MEM,
|
||||
AMDGPU_GFX_VML2_MEM,
|
||||
AMDGPU_GFX_VML2_WALKER_MEM,
|
||||
AMDGPU_GFX_MEM_TYPE_NUM
|
||||
};
|
||||
|
||||
struct amdgpu_mec {
|
||||
|
@ -75,8 +110,10 @@ struct amdgpu_mec {
|
|||
u32 num_mec;
|
||||
u32 num_pipe_per_mec;
|
||||
u32 num_queue_per_pipe;
|
||||
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
|
||||
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
|
||||
};
|
||||
|
||||
struct amdgpu_mec_bitmap {
|
||||
/* These are the resources for which amdgpu takes ownership */
|
||||
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
|
||||
};
|
||||
|
@ -120,6 +157,7 @@ struct amdgpu_kiq {
|
|||
struct amdgpu_ring ring;
|
||||
struct amdgpu_irq_src irq;
|
||||
const struct kiq_pm4_funcs *pmf;
|
||||
void *mqd_backup;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -230,23 +268,37 @@ struct amdgpu_gfx_ras {
|
|||
struct amdgpu_iv_entry *entry);
|
||||
};
|
||||
|
||||
struct amdgpu_gfx_shadow_info {
|
||||
u32 shadow_size;
|
||||
u32 shadow_alignment;
|
||||
u32 csa_size;
|
||||
u32 csa_alignment;
|
||||
};
|
||||
|
||||
struct amdgpu_gfx_funcs {
|
||||
/* get the gpu clock counter */
|
||||
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
|
||||
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
|
||||
u32 sh_num, u32 instance);
|
||||
void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
|
||||
u32 sh_num, u32 instance, int xcc_id);
|
||||
void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
|
||||
uint32_t wave, uint32_t *dst, int *no_fields);
|
||||
void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
|
||||
void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
|
||||
uint32_t wave, uint32_t thread, uint32_t start,
|
||||
uint32_t size, uint32_t *dst);
|
||||
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd,
|
||||
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
|
||||
uint32_t wave, uint32_t start, uint32_t size,
|
||||
uint32_t *dst);
|
||||
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
|
||||
u32 queue, u32 vmid);
|
||||
u32 queue, u32 vmid, u32 xcc_id);
|
||||
void (*init_spm_golden)(struct amdgpu_device *adev);
|
||||
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
|
||||
int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
|
||||
struct amdgpu_gfx_shadow_info *shadow_info);
|
||||
enum amdgpu_gfx_partition
|
||||
(*query_partition_mode)(struct amdgpu_device *adev);
|
||||
int (*switch_partition_mode)(struct amdgpu_device *adev,
|
||||
int num_xccs_per_xcp);
|
||||
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
|
||||
};
|
||||
|
||||
struct sq_work {
|
||||
|
@ -296,7 +348,8 @@ struct amdgpu_gfx {
|
|||
struct amdgpu_ce ce;
|
||||
struct amdgpu_me me;
|
||||
struct amdgpu_mec mec;
|
||||
struct amdgpu_kiq kiq;
|
||||
struct amdgpu_mec_bitmap mec_bitmap[AMDGPU_MAX_GC_INSTANCES];
|
||||
struct amdgpu_kiq kiq[AMDGPU_MAX_GC_INSTANCES];
|
||||
struct amdgpu_imu imu;
|
||||
bool rs64_enable; /* firmware format */
|
||||
const struct firmware *me_fw; /* ME firmware */
|
||||
|
@ -376,15 +429,31 @@ struct amdgpu_gfx {
|
|||
struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
|
||||
struct amdgpu_ring_mux muxer;
|
||||
|
||||
enum amdgpu_gfx_partition partition_mode;
|
||||
uint32_t num_xcd;
|
||||
bool cp_gfx_shadow; /* for gfx11 */
|
||||
|
||||
uint16_t xcc_mask;
|
||||
uint32_t num_xcc_per_xcp;
|
||||
struct mutex partition_mutex;
|
||||
};
|
||||
|
||||
struct amdgpu_gfx_ras_reg_entry {
|
||||
struct amdgpu_ras_err_status_reg_entry reg_entry;
|
||||
enum amdgpu_gfx_ras_mem_id_type mem_id_type;
|
||||
uint32_t se_num;
|
||||
};
|
||||
|
||||
struct amdgpu_gfx_ras_mem_id_entry {
|
||||
const struct amdgpu_ras_memory_id_entry *mem_id_ent;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
#define AMDGPU_GFX_MEMID_ENT(x) {(x), ARRAY_SIZE(x)},
|
||||
|
||||
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
|
||||
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
|
||||
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
|
||||
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
|
||||
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
|
||||
#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
|
||||
#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si)))
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_create_bitmask - create a bitmask
|
||||
|
@ -404,19 +473,21 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
|
|||
|
||||
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
struct amdgpu_irq_src *irq);
|
||||
struct amdgpu_irq_src *irq, int xcc_id);
|
||||
|
||||
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
|
||||
|
||||
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id);
|
||||
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
|
||||
unsigned hpd_size);
|
||||
unsigned hpd_size, int xcc_id);
|
||||
|
||||
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
|
||||
unsigned mqd_size);
|
||||
void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev);
|
||||
unsigned mqd_size, int xcc_id);
|
||||
void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id);
|
||||
int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id);
|
||||
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id);
|
||||
int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id);
|
||||
int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id);
|
||||
|
||||
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
|
||||
|
@ -425,8 +496,8 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
|
|||
int pipe, int queue);
|
||||
void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
|
||||
int *mec, int *pipe, int *queue);
|
||||
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
|
||||
int pipe, int queue);
|
||||
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int xcc_id,
|
||||
int mec, int pipe, int queue);
|
||||
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
|
||||
|
@ -458,4 +529,33 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id)
|
|||
int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
|
||||
bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
|
||||
int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
|
||||
void *ras_error_status,
|
||||
void (*func)(struct amdgpu_device *adev, void *ras_error_status,
|
||||
int xcc_id));
|
||||
|
||||
static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case AMDGPU_SPX_PARTITION_MODE:
|
||||
return "SPX";
|
||||
case AMDGPU_DPX_PARTITION_MODE:
|
||||
return "DPX";
|
||||
case AMDGPU_TPX_PARTITION_MODE:
|
||||
return "TPX";
|
||||
case AMDGPU_QPX_PARTITION_MODE:
|
||||
return "QPX";
|
||||
case AMDGPU_CPX_PARTITION_MODE:
|
||||
return "CPX";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -534,22 +534,21 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
|
|||
* subject to change when ring number changes
|
||||
* Engine 17: Gart flushes
|
||||
*/
|
||||
#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
|
||||
#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
|
||||
#define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3
|
||||
|
||||
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
|
||||
{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
|
||||
GFXHUB_FREE_VM_INV_ENGS_BITMAP};
|
||||
unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
|
||||
unsigned i;
|
||||
unsigned vmhub, inv_eng;
|
||||
|
||||
if (adev->enable_mes) {
|
||||
/* init the vm inv eng for all vmhubs */
|
||||
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
|
||||
vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
|
||||
/* reserve engine 5 for firmware */
|
||||
for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++)
|
||||
vm_inv_engs[vmhub] &= ~(1 << 5);
|
||||
if (adev->enable_mes)
|
||||
vm_inv_engs[i] &= ~(1 << 5);
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->num_rings; ++i) {
|
||||
|
@ -593,6 +592,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
|
|||
case IP_VERSION(9, 3, 0):
|
||||
/* GC 10.3.7 */
|
||||
case IP_VERSION(10, 3, 7):
|
||||
/* GC 11.0.1 */
|
||||
case IP_VERSION(11, 0, 1):
|
||||
if (amdgpu_tmz == 0) {
|
||||
adev->gmc.tmz_enabled = false;
|
||||
dev_info(adev->dev,
|
||||
|
@ -616,7 +617,6 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
|
|||
case IP_VERSION(10, 3, 1):
|
||||
/* YELLOW_CARP*/
|
||||
case IP_VERSION(10, 3, 3):
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
/* Don't enable it by default yet.
|
||||
*/
|
||||
|
@ -670,7 +670,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
|
|||
for (i = 0; i < 16; i++) {
|
||||
reg = hub->vm_context0_cntl + hub->ctx_distance * i;
|
||||
|
||||
tmp = (hub_type == AMDGPU_GFXHUB_0) ?
|
||||
tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
|
||||
RREG32_SOC15_IP(GC, reg) :
|
||||
RREG32_SOC15_IP(MMHUB, reg);
|
||||
|
||||
|
@ -679,7 +679,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
|
|||
else
|
||||
tmp &= ~hub->vm_cntx_cntl_vm_fault;
|
||||
|
||||
(hub_type == AMDGPU_GFXHUB_0) ?
|
||||
(hub_type == AMDGPU_GFXHUB(0)) ?
|
||||
WREG32_SOC15_IP(GC, reg, tmp) :
|
||||
WREG32_SOC15_IP(MMHUB, reg, tmp);
|
||||
}
|
||||
|
@ -892,3 +892,47 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t current_memory_partition_show(
|
||||
struct device *dev, struct device_attribute *addr, char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
enum amdgpu_memory_partition mode;
|
||||
|
||||
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
|
||||
switch (mode) {
|
||||
case AMDGPU_NPS1_PARTITION_MODE:
|
||||
return sysfs_emit(buf, "NPS1\n");
|
||||
case AMDGPU_NPS2_PARTITION_MODE:
|
||||
return sysfs_emit(buf, "NPS2\n");
|
||||
case AMDGPU_NPS3_PARTITION_MODE:
|
||||
return sysfs_emit(buf, "NPS3\n");
|
||||
case AMDGPU_NPS4_PARTITION_MODE:
|
||||
return sysfs_emit(buf, "NPS4\n");
|
||||
case AMDGPU_NPS6_PARTITION_MODE:
|
||||
return sysfs_emit(buf, "NPS6\n");
|
||||
case AMDGPU_NPS8_PARTITION_MODE:
|
||||
return sysfs_emit(buf, "NPS8\n");
|
||||
default:
|
||||
return sysfs_emit(buf, "UNKNOWN\n");
|
||||
}
|
||||
|
||||
return sysfs_emit(buf, "UNKNOWN\n");
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(current_memory_partition);
|
||||
|
||||
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
|
||||
return 0;
|
||||
|
||||
return device_create_file(adev->dev,
|
||||
&dev_attr_current_memory_partition);
|
||||
}
|
||||
|
||||
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
device_remove_file(adev->dev, &dev_attr_current_memory_partition);
|
||||
}
|
||||
|
|
|
@ -63,6 +63,16 @@
|
|||
|
||||
struct firmware;
|
||||
|
||||
enum amdgpu_memory_partition {
|
||||
UNKNOWN_MEMORY_PARTITION_MODE = 0,
|
||||
AMDGPU_NPS1_PARTITION_MODE = 1,
|
||||
AMDGPU_NPS2_PARTITION_MODE = 2,
|
||||
AMDGPU_NPS3_PARTITION_MODE = 3,
|
||||
AMDGPU_NPS4_PARTITION_MODE = 4,
|
||||
AMDGPU_NPS6_PARTITION_MODE = 6,
|
||||
AMDGPU_NPS8_PARTITION_MODE = 8,
|
||||
};
|
||||
|
||||
/*
|
||||
* GMC page fault information
|
||||
*/
|
||||
|
@ -119,7 +129,8 @@ struct amdgpu_gmc_funcs {
|
|||
uint32_t vmhub, uint32_t flush_type);
|
||||
/* flush the vm tlb via pasid */
|
||||
int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
|
||||
uint32_t flush_type, bool all_hub);
|
||||
uint32_t flush_type, bool all_hub,
|
||||
uint32_t inst);
|
||||
/* flush the vm tlb via ring */
|
||||
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
|
||||
uint64_t pd_addr);
|
||||
|
@ -137,8 +148,15 @@ struct amdgpu_gmc_funcs {
|
|||
void (*get_vm_pte)(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo_va_mapping *mapping,
|
||||
uint64_t *flags);
|
||||
/* override per-page pte flags */
|
||||
void (*override_vm_pte_flags)(struct amdgpu_device *dev,
|
||||
struct amdgpu_vm *vm,
|
||||
uint64_t addr, uint64_t *flags);
|
||||
/* get the amount of memory used by the vbios for pre-OS console */
|
||||
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
|
||||
|
||||
enum amdgpu_memory_partition (*query_mem_partition_mode)(
|
||||
struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_xgmi_ras {
|
||||
|
@ -164,6 +182,21 @@ struct amdgpu_xgmi {
|
|||
struct amdgpu_xgmi_ras *ras;
|
||||
};
|
||||
|
||||
struct amdgpu_mem_partition_info {
|
||||
union {
|
||||
struct {
|
||||
uint32_t fpfn;
|
||||
uint32_t lpfn;
|
||||
} range;
|
||||
struct {
|
||||
int node;
|
||||
} numa;
|
||||
};
|
||||
uint64_t size;
|
||||
};
|
||||
|
||||
#define INVALID_PFN -1
|
||||
|
||||
struct amdgpu_gmc {
|
||||
/* FB's physical address in MMIO space (for CPU to
|
||||
* map FB). This is different compared to the agp/
|
||||
|
@ -250,7 +283,10 @@ struct amdgpu_gmc {
|
|||
uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
|
||||
|
||||
bool tmz_enabled;
|
||||
bool is_app_apu;
|
||||
|
||||
struct amdgpu_mem_partition_info *mem_partitions;
|
||||
uint8_t num_mem_partitions;
|
||||
const struct amdgpu_gmc_funcs *gmc_funcs;
|
||||
|
||||
struct amdgpu_xgmi xgmi;
|
||||
|
@ -296,14 +332,17 @@ struct amdgpu_gmc {
|
|||
};
|
||||
|
||||
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
|
||||
#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
|
||||
#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
|
||||
((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
|
||||
((adev), (pasid), (type), (allhub)))
|
||||
((adev), (pasid), (type), (allhub), (inst)))
|
||||
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
|
||||
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
|
||||
#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
|
||||
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
|
||||
#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
|
||||
#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
|
||||
(adev)->gmc.gmc_funcs->override_vm_pte_flags \
|
||||
((adev), (vm), (addr), (pte_flags))
|
||||
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
|
||||
|
||||
/**
|
||||
|
@ -373,4 +412,7 @@ uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
|
|||
uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
|
||||
uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
|
||||
int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
|
||||
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
|
||||
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -136,7 +136,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
uint64_t fence_ctx;
|
||||
uint32_t status = 0, alloc_size;
|
||||
unsigned fence_flags = 0;
|
||||
bool secure;
|
||||
bool secure, init_shadow;
|
||||
u64 shadow_va, csa_va, gds_va;
|
||||
int vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
|
||||
unsigned i;
|
||||
int r = 0;
|
||||
|
@ -150,9 +152,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
vm = job->vm;
|
||||
fence_ctx = job->base.s_fence ?
|
||||
job->base.s_fence->scheduled.context : 0;
|
||||
shadow_va = job->shadow_va;
|
||||
csa_va = job->csa_va;
|
||||
gds_va = job->gds_va;
|
||||
init_shadow = job->init_shadow;
|
||||
} else {
|
||||
vm = NULL;
|
||||
fence_ctx = 0;
|
||||
shadow_va = 0;
|
||||
csa_va = 0;
|
||||
gds_va = 0;
|
||||
init_shadow = false;
|
||||
}
|
||||
|
||||
if (!ring->sched.ready && !ring->is_mes_queue) {
|
||||
|
@ -212,7 +222,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
}
|
||||
|
||||
amdgpu_ring_ib_begin(ring);
|
||||
if (job && ring->funcs->init_cond_exec)
|
||||
|
||||
if (ring->funcs->emit_gfx_shadow)
|
||||
amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
|
||||
init_shadow, vmid);
|
||||
|
||||
if (ring->funcs->init_cond_exec)
|
||||
patch_offset = amdgpu_ring_init_cond_exec(ring);
|
||||
|
||||
amdgpu_device_flush_hdp(adev, ring);
|
||||
|
@ -263,6 +278,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
|
||||
}
|
||||
|
||||
if (ring->funcs->emit_gfx_shadow) {
|
||||
amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
|
||||
|
||||
if (ring->funcs->init_cond_exec) {
|
||||
unsigned ce_offset = ~0;
|
||||
|
||||
ce_offset = amdgpu_ring_init_cond_exec(ring);
|
||||
if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
|
||||
amdgpu_ring_patch_cond_exec(ring, ce_offset);
|
||||
}
|
||||
}
|
||||
|
||||
r = amdgpu_fence_emit(ring, f, job, fence_flags);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
|
||||
|
@ -436,7 +463,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
|
|||
|
||||
static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_device *adev = m->private;
|
||||
|
||||
seq_printf(m, "--------------------- DELAYED --------------------- \n");
|
||||
amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],
|
||||
|
|
|
@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
|
|||
if (r || !idle)
|
||||
goto error;
|
||||
|
||||
if (vm->reserved_vmid[vmhub]) {
|
||||
if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
|
||||
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
|
||||
if (r || !id)
|
||||
goto error;
|
||||
|
@ -460,14 +460,11 @@ error:
|
|||
}
|
||||
|
||||
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
unsigned vmhub)
|
||||
{
|
||||
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
|
||||
|
||||
mutex_lock(&id_mgr->lock);
|
||||
if (vm->reserved_vmid[vmhub])
|
||||
goto unlock;
|
||||
|
||||
++id_mgr->reserved_use_count;
|
||||
if (!id_mgr->reserved) {
|
||||
|
@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
|
|||
list_del_init(&id->list);
|
||||
id_mgr->reserved = id;
|
||||
}
|
||||
vm->reserved_vmid[vmhub] = true;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&id_mgr->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
unsigned vmhub)
|
||||
{
|
||||
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
|
||||
|
||||
mutex_lock(&id_mgr->lock);
|
||||
if (vm->reserved_vmid[vmhub] &&
|
||||
!--id_mgr->reserved_use_count) {
|
||||
if (!--id_mgr->reserved_use_count) {
|
||||
/* give the reserved ID back to normal round robin */
|
||||
list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
|
||||
id_mgr->reserved = NULL;
|
||||
}
|
||||
vm->reserved_vmid[vmhub] = false;
|
||||
|
||||
mutex_unlock(&id_mgr->lock);
|
||||
}
|
||||
|
||||
|
@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
|
|||
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
|
||||
}
|
||||
}
|
||||
/* alloc a default reserved vmid to enforce isolation */
|
||||
if (enforce_isolation)
|
||||
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -79,11 +79,9 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
|
|||
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
|
||||
struct amdgpu_vmid *id);
|
||||
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
unsigned vmhub);
|
||||
unsigned vmhub);
|
||||
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
unsigned vmhub);
|
||||
unsigned vmhub);
|
||||
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job, struct dma_fence **fence);
|
||||
void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
|
||||
|
|
|
@ -270,7 +270,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
|
|||
entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
|
||||
entry->timestamp_src = dw[2] >> 31;
|
||||
entry->pasid = dw[3] & 0xffff;
|
||||
entry->pasid_src = dw[3] >> 31;
|
||||
entry->node_id = (dw[3] >> 16) & 0xff;
|
||||
entry->src_data[0] = dw[4];
|
||||
entry->src_data[1] = dw[5];
|
||||
entry->src_data[2] = dw[6];
|
||||
|
|
|
@ -99,6 +99,21 @@ const char *soc15_ih_clientid_name[] = {
|
|||
"MP1"
|
||||
};
|
||||
|
||||
const int node_id_to_phys_map[NODEID_MAX] = {
|
||||
[AID0_NODEID] = 0,
|
||||
[XCD0_NODEID] = 0,
|
||||
[XCD1_NODEID] = 1,
|
||||
[AID1_NODEID] = 1,
|
||||
[XCD2_NODEID] = 2,
|
||||
[XCD3_NODEID] = 3,
|
||||
[AID2_NODEID] = 2,
|
||||
[XCD4_NODEID] = 4,
|
||||
[XCD5_NODEID] = 5,
|
||||
[AID3_NODEID] = 3,
|
||||
[XCD6_NODEID] = 6,
|
||||
[XCD7_NODEID] = 7,
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_irq_disable_all - disable *all* interrupts
|
||||
*
|
||||
|
@ -109,7 +124,7 @@ const char *soc15_ih_clientid_name[] = {
|
|||
void amdgpu_irq_disable_all(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned long irqflags;
|
||||
unsigned i, j, k;
|
||||
unsigned int i, j, k;
|
||||
int r;
|
||||
|
||||
spin_lock_irqsave(&adev->irq.lock, irqflags);
|
||||
|
@ -124,7 +139,6 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
|
|||
continue;
|
||||
|
||||
for (k = 0; k < src->num_types; ++k) {
|
||||
atomic_set(&src->enabled_types[k], 0);
|
||||
r = src->funcs->set(adev, src, k,
|
||||
AMDGPU_IRQ_STATE_DISABLE);
|
||||
if (r)
|
||||
|
@ -268,11 +282,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
|
|||
int nvec = pci_msix_vec_count(adev->pdev);
|
||||
unsigned int flags;
|
||||
|
||||
if (nvec <= 0) {
|
||||
if (nvec <= 0)
|
||||
flags = PCI_IRQ_MSI;
|
||||
} else {
|
||||
else
|
||||
flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
|
||||
}
|
||||
|
||||
/* we only need one vector */
|
||||
nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
|
||||
if (nvec > 0) {
|
||||
|
@ -331,7 +345,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
|
|||
*/
|
||||
void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned i, j;
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
|
||||
if (!adev->irq.client[i].sources)
|
||||
|
@ -365,7 +379,7 @@ void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
|
|||
* 0 on success or error code otherwise
|
||||
*/
|
||||
int amdgpu_irq_add_id(struct amdgpu_device *adev,
|
||||
unsigned client_id, unsigned src_id,
|
||||
unsigned int client_id, unsigned int src_id,
|
||||
struct amdgpu_irq_src *source)
|
||||
{
|
||||
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
|
||||
|
@ -417,7 +431,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
|
|||
{
|
||||
u32 ring_index = ih->rptr >> 2;
|
||||
struct amdgpu_iv_entry entry;
|
||||
unsigned client_id, src_id;
|
||||
unsigned int client_id, src_id;
|
||||
struct amdgpu_irq_src *src;
|
||||
bool handled = false;
|
||||
int r;
|
||||
|
@ -492,7 +506,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
|
|||
* Updates interrupt state for the specific source (all ASICs).
|
||||
*/
|
||||
int amdgpu_irq_update(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src, unsigned type)
|
||||
struct amdgpu_irq_src *src, unsigned int type)
|
||||
{
|
||||
unsigned long irqflags;
|
||||
enum amdgpu_interrupt_state state;
|
||||
|
@ -501,7 +515,8 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
|
|||
spin_lock_irqsave(&adev->irq.lock, irqflags);
|
||||
|
||||
/* We need to determine after taking the lock, otherwise
|
||||
we might disable just enabled interrupts again */
|
||||
* we might disable just enabled interrupts again
|
||||
*/
|
||||
if (amdgpu_irq_enabled(adev, src, type))
|
||||
state = AMDGPU_IRQ_STATE_ENABLE;
|
||||
else
|
||||
|
@ -555,7 +570,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
|
|||
* 0 on success or error code otherwise
|
||||
*/
|
||||
int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
|
||||
unsigned type)
|
||||
unsigned int type)
|
||||
{
|
||||
if (!adev->irq.installed)
|
||||
return -ENOENT;
|
||||
|
@ -585,7 +600,7 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
|
|||
* 0 on success or error code otherwise
|
||||
*/
|
||||
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
|
||||
unsigned type)
|
||||
unsigned int type)
|
||||
{
|
||||
if (!adev->irq.installed)
|
||||
return -ENOENT;
|
||||
|
@ -619,7 +634,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
|
|||
* invalid parameters
|
||||
*/
|
||||
bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
|
||||
unsigned type)
|
||||
unsigned int type)
|
||||
{
|
||||
if (!adev->irq.installed)
|
||||
return false;
|
||||
|
@ -732,7 +747,7 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
|
|||
* Returns:
|
||||
* Linux IRQ
|
||||
*/
|
||||
unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id)
|
||||
unsigned int amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned int src_id)
|
||||
{
|
||||
adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id);
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ struct amdgpu_iv_entry {
|
|||
uint64_t timestamp;
|
||||
unsigned timestamp_src;
|
||||
unsigned pasid;
|
||||
unsigned pasid_src;
|
||||
unsigned node_id;
|
||||
unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
|
||||
const uint32_t *iv_entry;
|
||||
};
|
||||
|
@ -102,6 +102,24 @@ struct amdgpu_irq {
|
|||
bool retry_cam_enabled;
|
||||
};
|
||||
|
||||
enum interrupt_node_id_per_aid {
|
||||
AID0_NODEID = 0,
|
||||
XCD0_NODEID = 1,
|
||||
XCD1_NODEID = 2,
|
||||
AID1_NODEID = 4,
|
||||
XCD2_NODEID = 5,
|
||||
XCD3_NODEID = 6,
|
||||
AID2_NODEID = 8,
|
||||
XCD4_NODEID = 9,
|
||||
XCD5_NODEID = 10,
|
||||
AID3_NODEID = 12,
|
||||
XCD6_NODEID = 13,
|
||||
XCD7_NODEID = 14,
|
||||
NODEID_MAX,
|
||||
};
|
||||
|
||||
extern const int node_id_to_phys_map[NODEID_MAX];
|
||||
|
||||
void amdgpu_irq_disable_all(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_irq_init(struct amdgpu_device *adev);
|
||||
|
|
|
@ -65,6 +65,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
|
|||
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
|
||||
ti.process_name, ti.tgid, ti.task_name, ti.pid);
|
||||
|
||||
dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
|
||||
|
||||
if (amdgpu_device_should_recover_gpu(ring->adev)) {
|
||||
struct amdgpu_reset_context reset_context;
|
||||
memset(&reset_context, 0, sizeof(reset_context));
|
||||
|
|
|
@ -67,6 +67,12 @@ struct amdgpu_job {
|
|||
uint64_t uf_addr;
|
||||
uint64_t uf_sequence;
|
||||
|
||||
/* virtual addresses for shadow/GDS/CSA */
|
||||
uint64_t shadow_va;
|
||||
uint64_t csa_va;
|
||||
uint64_t gds_va;
|
||||
bool init_shadow;
|
||||
|
||||
/* job_run_counter >= 1 means a resubmit job */
|
||||
uint32_t job_run_counter;
|
||||
|
||||
|
|
|
@ -45,13 +45,14 @@ int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
|
|||
|
||||
int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
|
||||
if (adev->jpeg.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
|
||||
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
|
||||
amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
|
||||
}
|
||||
|
||||
mutex_destroy(&adev->jpeg.jpeg_pg_lock);
|
||||
|
@ -76,13 +77,14 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
|
|||
struct amdgpu_device *adev =
|
||||
container_of(work, struct amdgpu_device, jpeg.idle_work.work);
|
||||
unsigned int fences = 0;
|
||||
unsigned int i;
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
|
||||
if (adev->jpeg.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
|
||||
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
|
||||
fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]);
|
||||
}
|
||||
|
||||
if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
|
||||
|
@ -122,18 +124,21 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
|
|||
if (amdgpu_sriov_vf(adev))
|
||||
return 0;
|
||||
|
||||
WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD);
|
||||
/* Add a read register to make sure the write register is executed. */
|
||||
RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
|
||||
|
||||
amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
|
||||
amdgpu_ring_write(ring, 0xABADCAFE);
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
|
||||
if (tmp == 0xABADCAFE)
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
|
@ -161,8 +166,7 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
|
|||
|
||||
ib = &job->ibs[0];
|
||||
|
||||
ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0,
|
||||
PACKETJ_TYPE0);
|
||||
ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0);
|
||||
ib->ptr[1] = 0xDEADBEEF;
|
||||
for (i = 2; i < 16; i += 2) {
|
||||
ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
|
||||
|
@ -208,7 +212,7 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
}
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
|
||||
tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
break;
|
||||
udelay(1);
|
||||
|
@ -241,6 +245,31 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
|
||||
{
|
||||
int r, i;
|
||||
|
||||
r = amdgpu_ras_block_late_init(adev, ras_block);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (amdgpu_ras_is_supported(adev, ras_block->block)) {
|
||||
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
|
||||
if (adev->jpeg.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
|
||||
if (r)
|
||||
goto late_fini;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
late_fini:
|
||||
amdgpu_ras_block_late_fini(adev, ras_block);
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int err;
|
||||
|
@ -262,7 +291,7 @@ int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
|
|||
adev->jpeg.ras_if = &ras->ras_block.ras_comm;
|
||||
|
||||
if (!ras->ras_block.ras_late_init)
|
||||
ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
|
||||
ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -26,19 +26,22 @@
|
|||
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
#define AMDGPU_MAX_JPEG_INSTANCES 2
|
||||
#define AMDGPU_MAX_JPEG_INSTANCES 4
|
||||
#define AMDGPU_MAX_JPEG_RINGS 8
|
||||
|
||||
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
|
||||
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
|
||||
|
||||
struct amdgpu_jpeg_reg{
|
||||
unsigned jpeg_pitch;
|
||||
unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
|
||||
};
|
||||
|
||||
struct amdgpu_jpeg_inst {
|
||||
struct amdgpu_ring ring_dec;
|
||||
struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS];
|
||||
struct amdgpu_irq_src irq;
|
||||
struct amdgpu_irq_src ras_poison_irq;
|
||||
struct amdgpu_jpeg_reg external;
|
||||
uint8_t aid_id;
|
||||
};
|
||||
|
||||
struct amdgpu_jpeg_ras {
|
||||
|
@ -48,6 +51,7 @@ struct amdgpu_jpeg_ras {
|
|||
struct amdgpu_jpeg {
|
||||
uint8_t num_jpeg_inst;
|
||||
struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
|
||||
unsigned num_jpeg_rings;
|
||||
struct amdgpu_jpeg_reg internal;
|
||||
unsigned harvest_config;
|
||||
struct delayed_work idle_work;
|
||||
|
@ -56,6 +60,9 @@ struct amdgpu_jpeg {
|
|||
atomic_t total_submission_cnt;
|
||||
struct ras_common_if *ras_if;
|
||||
struct amdgpu_jpeg_ras *ras;
|
||||
|
||||
uint16_t inst_mask;
|
||||
uint8_t num_inst_per_aid;
|
||||
};
|
||||
|
||||
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
|
||||
|
@ -72,6 +79,8 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
|
|||
int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
|
||||
struct ras_common_if *ras_block);
|
||||
int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
|
||||
|
||||
#endif /*__AMDGPU_JPEG_H__*/
|
||||
|
|
|
@ -462,8 +462,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
if (adev->jpeg.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->jpeg.inst[i].ring_dec.sched.ready)
|
||||
++num_rings;
|
||||
for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
|
||||
if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 16;
|
||||
ib_size_alignment = 16;
|
||||
|
@ -876,6 +877,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|||
dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu;
|
||||
dev_info->mall_size = adev->gmc.mall_size;
|
||||
|
||||
|
||||
if (adev->gfx.funcs->get_gfx_shadow_info) {
|
||||
struct amdgpu_gfx_shadow_info shadow_info;
|
||||
|
||||
ret = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info);
|
||||
if (!ret) {
|
||||
dev_info->shadow_size = shadow_info.shadow_size;
|
||||
dev_info->shadow_alignment = shadow_info.shadow_alignment;
|
||||
dev_info->csa_size = shadow_info.csa_size;
|
||||
dev_info->csa_alignment = shadow_info.csa_alignment;
|
||||
}
|
||||
}
|
||||
|
||||
ret = copy_to_user(out, dev_info,
|
||||
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
|
||||
kfree(dev_info);
|
||||
|
@ -1140,6 +1154,15 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|||
kfree(caps);
|
||||
return r;
|
||||
}
|
||||
case AMDGPU_INFO_MAX_IBS: {
|
||||
uint32_t max_ibs[AMDGPU_HW_IP_NUM];
|
||||
|
||||
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
|
||||
max_ibs[i] = amdgpu_ring_max_ibs(i);
|
||||
|
||||
return copy_to_user(out, max_ibs,
|
||||
min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
|
||||
}
|
||||
default:
|
||||
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
|
||||
return -EINVAL;
|
||||
|
@ -1210,6 +1233,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
|
|||
if (r)
|
||||
goto error_pasid;
|
||||
|
||||
r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
|
||||
if (r)
|
||||
goto error_vm;
|
||||
|
||||
r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
|
||||
if (r)
|
||||
goto error_vm;
|
||||
|
@ -1284,12 +1311,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
|
|||
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
|
||||
amdgpu_vce_free_handles(adev, file_priv);
|
||||
|
||||
if (amdgpu_mcbp) {
|
||||
/* TODO: how to handle reserve failure */
|
||||
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
|
||||
amdgpu_vm_bo_del(adev, fpriv->csa_va);
|
||||
if (fpriv->csa_va) {
|
||||
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
|
||||
|
||||
WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
|
||||
fpriv->csa_va, csa_addr));
|
||||
fpriv->csa_va = NULL;
|
||||
amdgpu_bo_unreserve(adev->virt.csa_obj);
|
||||
}
|
||||
|
||||
pasid = fpriv->vm.pasid;
|
||||
|
@ -1441,7 +1468,7 @@ void amdgpu_disable_vblank_kms(struct drm_crtc *crtc)
|
|||
|
||||
static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_device *adev = m->private;
|
||||
struct drm_amdgpu_info_firmware fw_info;
|
||||
struct drm_amdgpu_query_fw query_fw;
|
||||
struct atom_context *ctx = adev->mode_info.atom_context;
|
||||
|
@ -1449,7 +1476,7 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
|
|||
int ret, i;
|
||||
|
||||
static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = {
|
||||
#define TA_FW_NAME(type) [TA_FW_TYPE_PSP_##type] = #type
|
||||
#define TA_FW_NAME(type)[TA_FW_TYPE_PSP_##type] = #type
|
||||
TA_FW_NAME(XGMI),
|
||||
TA_FW_NAME(RAS),
|
||||
TA_FW_NAME(HDCP),
|
||||
|
@ -1548,7 +1575,7 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
|
|||
fw_info.feature, fw_info.ver);
|
||||
|
||||
/* RLCV */
|
||||
query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
|
||||
query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
|
||||
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
|
@ -924,6 +924,43 @@ error:
|
|||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
|
||||
uint64_t process_context_addr,
|
||||
uint32_t spi_gdbg_per_vmid_cntl,
|
||||
const uint32_t *tcp_watch_cntl,
|
||||
uint32_t flags,
|
||||
bool trap_en)
|
||||
{
|
||||
struct mes_misc_op_input op_input = {0};
|
||||
int r;
|
||||
|
||||
if (!adev->mes.funcs->misc_op) {
|
||||
DRM_ERROR("mes set shader debugger is not supported!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
|
||||
op_input.set_shader_debugger.process_context_addr = process_context_addr;
|
||||
op_input.set_shader_debugger.flags.u32all = flags;
|
||||
op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
|
||||
memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
|
||||
sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
|
||||
|
||||
if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
|
||||
AMDGPU_MES_API_VERSION_SHIFT) >= 14)
|
||||
op_input.set_shader_debugger.trap_en = trap_en;
|
||||
|
||||
amdgpu_mes_lock(&adev->mes);
|
||||
|
||||
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
|
||||
if (r)
|
||||
DRM_ERROR("failed to set_shader_debugger\n");
|
||||
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
|
@ -1305,14 +1342,9 @@ static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
|
|||
if (!ring)
|
||||
continue;
|
||||
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
DRM_DEV_ERROR(ring->adev->dev,
|
||||
"ring %s test failed (%d)\n",
|
||||
ring->name, r);
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
} else
|
||||
DRM_INFO("ring %s test pass\n", ring->name);
|
||||
|
||||
r = amdgpu_ring_test_ib(ring, 1000 * 10);
|
||||
if (r) {
|
||||
|
|
|
@ -219,6 +219,8 @@ struct mes_add_queue_input {
|
|||
uint32_t gws_size;
|
||||
uint64_t tba_addr;
|
||||
uint64_t tma_addr;
|
||||
uint32_t trap_en;
|
||||
uint32_t skip_process_ctx_clear;
|
||||
uint32_t is_kfd_process;
|
||||
uint32_t is_aql_queue;
|
||||
uint32_t queue_size;
|
||||
|
@ -256,6 +258,7 @@ enum mes_misc_opcode {
|
|||
MES_MISC_OP_READ_REG,
|
||||
MES_MISC_OP_WRM_REG_WAIT,
|
||||
MES_MISC_OP_WRM_REG_WR_WAIT,
|
||||
MES_MISC_OP_SET_SHADER_DEBUGGER,
|
||||
};
|
||||
|
||||
struct mes_misc_op_input {
|
||||
|
@ -278,6 +281,21 @@ struct mes_misc_op_input {
|
|||
uint32_t reg0;
|
||||
uint32_t reg1;
|
||||
} wrm_reg;
|
||||
|
||||
struct {
|
||||
uint64_t process_context_addr;
|
||||
union {
|
||||
struct {
|
||||
uint64_t single_memop : 1;
|
||||
uint64_t single_alu_op : 1;
|
||||
uint64_t reserved: 30;
|
||||
};
|
||||
uint32_t u32all;
|
||||
} flags;
|
||||
uint32_t spi_gdbg_per_vmid_cntl;
|
||||
uint32_t tcp_watch_cntl[4];
|
||||
uint32_t trap_en;
|
||||
} set_shader_debugger;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -340,6 +358,12 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
|
|||
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask);
|
||||
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
|
||||
uint64_t process_context_addr,
|
||||
uint32_t spi_gdbg_per_vmid_cntl,
|
||||
const uint32_t *tcp_watch_cntl,
|
||||
uint32_t flags,
|
||||
bool trap_en);
|
||||
|
||||
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
|
||||
int queue_type, int idx,
|
||||
|
|
|
@ -21,6 +21,29 @@
|
|||
#ifndef __AMDGPU_MMHUB_H__
|
||||
#define __AMDGPU_MMHUB_H__
|
||||
|
||||
enum amdgpu_mmhub_ras_memory_id {
|
||||
AMDGPU_MMHUB_WGMI_PAGEMEM = 0,
|
||||
AMDGPU_MMHUB_RGMI_PAGEMEM = 1,
|
||||
AMDGPU_MMHUB_WDRAM_PAGEMEM = 2,
|
||||
AMDGPU_MMHUB_RDRAM_PAGEMEM = 3,
|
||||
AMDGPU_MMHUB_WIO_CMDMEM = 4,
|
||||
AMDGPU_MMHUB_RIO_CMDMEM = 5,
|
||||
AMDGPU_MMHUB_WGMI_CMDMEM = 6,
|
||||
AMDGPU_MMHUB_RGMI_CMDMEM = 7,
|
||||
AMDGPU_MMHUB_WDRAM_CMDMEM = 8,
|
||||
AMDGPU_MMHUB_RDRAM_CMDMEM = 9,
|
||||
AMDGPU_MMHUB_MAM_DMEM0 = 10,
|
||||
AMDGPU_MMHUB_MAM_DMEM1 = 11,
|
||||
AMDGPU_MMHUB_MAM_DMEM2 = 12,
|
||||
AMDGPU_MMHUB_MAM_DMEM3 = 13,
|
||||
AMDGPU_MMHUB_WRET_TAGMEM = 19,
|
||||
AMDGPU_MMHUB_RRET_TAGMEM = 20,
|
||||
AMDGPU_MMHUB_WIO_DATAMEM = 21,
|
||||
AMDGPU_MMHUB_WGMI_DATAMEM = 22,
|
||||
AMDGPU_MMHUB_WDRAM_DATAMEM = 23,
|
||||
AMDGPU_MMHUB_MEMORY_BLOCK_LAST,
|
||||
};
|
||||
|
||||
struct amdgpu_mmhub_ras {
|
||||
struct amdgpu_ras_block_object ras_block;
|
||||
};
|
||||
|
|
|
@ -61,6 +61,7 @@ struct amdgpu_nbio_funcs {
|
|||
u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
|
||||
u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
|
||||
u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
|
||||
u32 (*get_pcie_index_hi_offset)(struct amdgpu_device *adev);
|
||||
u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev);
|
||||
u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev);
|
||||
u32 (*get_rev_id)(struct amdgpu_device *adev);
|
||||
|
@ -95,6 +96,11 @@ struct amdgpu_nbio_funcs {
|
|||
void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
|
||||
void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
|
||||
u32 (*get_rom_offset)(struct amdgpu_device *adev);
|
||||
int (*get_compute_partition_mode)(struct amdgpu_device *adev);
|
||||
u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
|
||||
u32 *supp_modes);
|
||||
void (*set_compute_partition_mode)(struct amdgpu_device *adev,
|
||||
enum amdgpu_gfx_partition mode);
|
||||
};
|
||||
|
||||
struct amdgpu_nbio {
|
||||
|
|
|
@ -79,9 +79,10 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
|
|||
static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
|
||||
{
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
|
||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
|
||||
struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
|
||||
struct amdgpu_bo_vm *vmbo;
|
||||
|
||||
bo = shadow_bo->parent;
|
||||
vmbo = to_amdgpu_bo_vm(bo);
|
||||
/* in case amdgpu_device_recover_vram got NULL of bo->parent */
|
||||
if (!list_empty(&vmbo->shadow_list)) {
|
||||
|
@ -130,15 +131,25 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
|
|||
u32 c = 0;
|
||||
|
||||
if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
|
||||
unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
|
||||
unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
|
||||
int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
|
||||
|
||||
places[c].fpfn = 0;
|
||||
places[c].lpfn = 0;
|
||||
if (adev->gmc.mem_partitions && mem_id >= 0) {
|
||||
places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn;
|
||||
/*
|
||||
* memory partition range lpfn is inclusive start + size - 1
|
||||
* TTM place lpfn is exclusive start + size
|
||||
*/
|
||||
places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1;
|
||||
} else {
|
||||
places[c].fpfn = 0;
|
||||
places[c].lpfn = 0;
|
||||
}
|
||||
places[c].mem_type = TTM_PL_VRAM;
|
||||
places[c].flags = 0;
|
||||
|
||||
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
|
||||
places[c].lpfn = visible_pfn;
|
||||
places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn);
|
||||
else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size)
|
||||
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
|
||||
|
||||
|
@ -574,6 +585,13 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
|
|||
|
||||
bo->flags = bp->flags;
|
||||
|
||||
if (adev->gmc.mem_partitions)
|
||||
/* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */
|
||||
bo->xcp_id = bp->xcp_id_plus1 - 1;
|
||||
else
|
||||
/* For GPUs without spatial partitioning */
|
||||
bo->xcp_id = 0;
|
||||
|
||||
if (!amdgpu_bo_support_uswc(bo->flags))
|
||||
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
|
||||
|
@ -610,7 +628,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
|
|||
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
|
||||
struct dma_fence *fence;
|
||||
|
||||
r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);
|
||||
r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
|
||||
if (unlikely(r))
|
||||
goto fail_unreserve;
|
||||
|
||||
|
@ -694,11 +712,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
|
|||
return r;
|
||||
|
||||
*vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
|
||||
INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list);
|
||||
/* Set destroy callback to amdgpu_bo_vm_destroy after vmbo->shadow_list
|
||||
* is initialized.
|
||||
*/
|
||||
bo_ptr->tbo.destroy = &amdgpu_bo_vm_destroy;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -715,6 +728,8 @@ void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
|
|||
|
||||
mutex_lock(&adev->shadow_list_lock);
|
||||
list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
|
||||
vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
|
||||
vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
|
||||
mutex_unlock(&adev->shadow_list_lock);
|
||||
}
|
||||
|
||||
|
@ -935,7 +950,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
|
|||
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
amdgpu_bo_placement_from_domain(bo, domain);
|
||||
for (i = 0; i < bo->placement.num_placement; i++) {
|
||||
unsigned fpfn, lpfn;
|
||||
unsigned int fpfn, lpfn;
|
||||
|
||||
fpfn = min_offset >> PAGE_SHIFT;
|
||||
lpfn = max_offset >> PAGE_SHIFT;
|
||||
|
@ -1016,7 +1031,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
|
|||
}
|
||||
}
|
||||
|
||||
static const char *amdgpu_vram_names[] = {
|
||||
static const char * const amdgpu_vram_names[] = {
|
||||
"UNKNOWN",
|
||||
"GDDR1",
|
||||
"DDR2",
|
||||
|
@ -1044,7 +1059,7 @@ static const char *amdgpu_vram_names[] = {
|
|||
int amdgpu_bo_init(struct amdgpu_device *adev)
|
||||
{
|
||||
/* On A+A platform, VRAM can be mapped as WB */
|
||||
if (!adev->gmc.xgmi.connected_to_cpu) {
|
||||
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
|
||||
/* reserve PAT memory space to WC for VRAM */
|
||||
int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
|
||||
adev->gmc.aper_size);
|
||||
|
@ -1080,8 +1095,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
|
|||
amdgpu_ttm_fini(adev);
|
||||
|
||||
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
|
||||
|
||||
if (!adev->gmc.xgmi.connected_to_cpu) {
|
||||
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
|
||||
arch_phys_wc_del(adev->gmc.vram_mtrr);
|
||||
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
|
||||
}
|
||||
|
@ -1148,8 +1162,8 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
|
|||
* Returns:
|
||||
* 0 for success or a negative error code on failure.
|
||||
*/
|
||||
int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
|
||||
uint32_t metadata_size, uint64_t flags)
|
||||
int amdgpu_bo_set_metadata(struct amdgpu_bo *bo, void *metadata,
|
||||
u32 metadata_size, uint64_t flags)
|
||||
{
|
||||
struct amdgpu_bo_user *ubo;
|
||||
void *buffer;
|
||||
|
@ -1338,7 +1352,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
|
|||
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
|
||||
return;
|
||||
|
||||
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
|
||||
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
|
||||
if (!WARN_ON(r)) {
|
||||
amdgpu_bo_fence(abo, fence, false);
|
||||
dma_fence_put(fence);
|
||||
|
|
|
@ -56,6 +56,8 @@ struct amdgpu_bo_param {
|
|||
bool no_wait_gpu;
|
||||
struct dma_resv *resv;
|
||||
void (*destroy)(struct ttm_buffer_object *bo);
|
||||
/* xcp partition number plus 1, 0 means any partition */
|
||||
int8_t xcp_id_plus1;
|
||||
};
|
||||
|
||||
/* bo virtual addresses in a vm */
|
||||
|
@ -108,6 +110,13 @@ struct amdgpu_bo {
|
|||
struct mmu_interval_notifier notifier;
|
||||
#endif
|
||||
struct kgd_mem *kfd_bo;
|
||||
|
||||
/*
|
||||
* For GPUs with spatial partitioning, xcp partition number, -1 means
|
||||
* any partition. For other ASICs without spatial partition, always 0
|
||||
* for memory accounting.
|
||||
*/
|
||||
int8_t xcp_id;
|
||||
};
|
||||
|
||||
struct amdgpu_bo_user {
|
||||
|
|
|
@ -146,6 +146,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
|
|||
case IP_VERSION(13, 0, 0):
|
||||
adev->virt.autoload_ucode_id = 0;
|
||||
break;
|
||||
case IP_VERSION(13, 0, 6):
|
||||
ret = psp_init_cap_microcode(psp, ucode_prefix);
|
||||
break;
|
||||
case IP_VERSION(13, 0, 10):
|
||||
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
|
||||
ret = psp_init_cap_microcode(psp, ucode_prefix);
|
||||
|
@ -329,6 +332,9 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
|
|||
bool ret = false;
|
||||
int i;
|
||||
|
||||
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6))
|
||||
return false;
|
||||
|
||||
db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
|
||||
db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header);
|
||||
|
||||
|
@ -411,7 +417,7 @@ static int psp_sw_init(void *handle)
|
|||
if ((psp_get_runtime_db_entry(adev,
|
||||
PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS,
|
||||
&scpm_entry)) &&
|
||||
(SCPM_DISABLE != scpm_entry.scpm_status)) {
|
||||
(scpm_entry.scpm_status != SCPM_DISABLE)) {
|
||||
adev->scpm_enabled = true;
|
||||
adev->scpm_status = scpm_entry.scpm_status;
|
||||
} else {
|
||||
|
@ -458,10 +464,9 @@ static int psp_sw_init(void *handle)
|
|||
|
||||
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
|
||||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) {
|
||||
ret= psp_sysfs_init(adev);
|
||||
if (ret) {
|
||||
ret = psp_sysfs_init(adev);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
|
||||
|
@ -474,7 +479,8 @@ static int psp_sw_init(void *handle)
|
|||
return ret;
|
||||
|
||||
ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
&psp->fence_buf_bo,
|
||||
&psp->fence_buf_mc_addr,
|
||||
&psp->fence_buf);
|
||||
|
@ -482,7 +488,8 @@ static int psp_sw_init(void *handle)
|
|||
goto failed1;
|
||||
|
||||
ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
|
||||
(void **)&psp->cmd_buf_mem);
|
||||
if (ret)
|
||||
|
@ -520,6 +527,8 @@ static int psp_sw_fini(void *handle)
|
|||
kfree(cmd);
|
||||
cmd = NULL;
|
||||
|
||||
psp_free_shared_bufs(psp);
|
||||
|
||||
if (psp->km_ring.ring_mem)
|
||||
amdgpu_bo_free_kernel(&adev->firmware.rbuf,
|
||||
&psp->km_ring.ring_mem_mc_addr,
|
||||
|
@ -643,7 +652,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
|
|||
skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
|
||||
psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
|
||||
|
||||
memcpy((void*)&cmd->resp, (void*)&psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
|
||||
memcpy(&cmd->resp, &psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
|
||||
|
||||
/* In some cases, psp response status is not 0 even there is no
|
||||
* problem while the command is submitted. Some version of PSP FW
|
||||
|
@ -699,8 +708,13 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
|
|||
uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
uint32_t size = amdgpu_bo_size(tmr_bo);
|
||||
uint64_t tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
|
||||
uint32_t size = 0;
|
||||
uint64_t tmr_pa = 0;
|
||||
|
||||
if (tmr_bo) {
|
||||
size = amdgpu_bo_size(tmr_bo);
|
||||
tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
|
||||
|
@ -745,6 +759,16 @@ static int psp_load_toc(struct psp_context *psp,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static bool psp_boottime_tmr(struct psp_context *psp)
|
||||
{
|
||||
switch (psp->adev->ip_versions[MP0_HWIP][0]) {
|
||||
case IP_VERSION(13, 0, 6):
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set up Trusted Memory Region */
|
||||
static int psp_tmr_init(struct psp_context *psp)
|
||||
{
|
||||
|
@ -816,8 +840,9 @@ static int psp_tmr_load(struct psp_context *psp)
|
|||
cmd = acquire_psp_cmd_buf(psp);
|
||||
|
||||
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
|
||||
DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
|
||||
amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
|
||||
if (psp->tmr_bo)
|
||||
DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
|
||||
amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
@ -828,7 +853,7 @@ static int psp_tmr_load(struct psp_context *psp)
|
|||
}
|
||||
|
||||
static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp,
|
||||
struct psp_gfx_cmd_resp *cmd)
|
||||
struct psp_gfx_cmd_resp *cmd)
|
||||
{
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR;
|
||||
|
@ -969,6 +994,27 @@ static int psp_rl_load(struct amdgpu_device *adev)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int psp_spatial_partition(struct psp_context *psp, int mode)
|
||||
{
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
int ret;
|
||||
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
return 0;
|
||||
|
||||
cmd = acquire_psp_cmd_buf(psp);
|
||||
|
||||
cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART;
|
||||
cmd->cmd.cmd_spatial_part.mode = mode;
|
||||
|
||||
dev_info(psp->adev->dev, "Requesting %d partitions through PSP", mode);
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
|
||||
|
||||
release_psp_cmd_buf(psp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_asd_initialize(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
|
@ -1065,7 +1111,7 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
|||
struct ta_context *context)
|
||||
{
|
||||
cmd->cmd_id = context->ta_load_type;
|
||||
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
|
||||
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
|
||||
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);
|
||||
cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes;
|
||||
|
||||
|
@ -1136,9 +1182,8 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context)
|
|||
|
||||
context->resp_status = cmd->resp.status;
|
||||
|
||||
if (!ret) {
|
||||
if (!ret)
|
||||
context->session_id = cmd->resp.session_id;
|
||||
}
|
||||
|
||||
release_psp_cmd_buf(psp);
|
||||
|
||||
|
@ -1254,8 +1299,9 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
|
|||
|
||||
static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp)
|
||||
{
|
||||
return psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
|
||||
psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b;
|
||||
return (psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
|
||||
psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) ||
|
||||
psp->adev->ip_versions[MP0_HWIP][0] >= IP_VERSION(13, 0, 6);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1363,6 +1409,9 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
|
|||
/* Invoke xgmi ta again to get the link information */
|
||||
if (psp_xgmi_peer_link_info_supported(psp)) {
|
||||
struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output;
|
||||
bool requires_reflection =
|
||||
(psp->xgmi_context.supports_extended_data && get_extended_data) ||
|
||||
psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6);
|
||||
|
||||
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
|
||||
|
||||
|
@ -1377,11 +1426,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
|
|||
topology->nodes[i].num_links = get_extended_data ?
|
||||
topology->nodes[i].num_links +
|
||||
link_info_output->nodes[i].num_links :
|
||||
link_info_output->nodes[i].num_links;
|
||||
((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links :
|
||||
link_info_output->nodes[i].num_links);
|
||||
|
||||
/* reflect the topology information for bi-directionality */
|
||||
if (psp->xgmi_context.supports_extended_data &&
|
||||
get_extended_data && topology->nodes[i].num_hops)
|
||||
if (requires_reflection && topology->nodes[i].num_hops)
|
||||
psp_xgmi_reflect_topology_info(psp, topology->nodes[i]);
|
||||
}
|
||||
}
|
||||
|
@ -1465,8 +1514,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
|
|||
if (amdgpu_ras_intr_triggered())
|
||||
return ret;
|
||||
|
||||
if (ras_cmd->if_version > RAS_TA_HOST_IF_VER)
|
||||
{
|
||||
if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
|
||||
DRM_WARN("RAS: Unsupported Interface");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -1476,8 +1524,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
|
|||
dev_warn(psp->adev->dev, "ECC switch disabled\n");
|
||||
|
||||
ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE;
|
||||
}
|
||||
else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
|
||||
} else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
|
||||
dev_warn(psp->adev->dev,
|
||||
"RAS internal register access blocked\n");
|
||||
|
||||
|
@ -1573,11 +1620,10 @@ int psp_ras_initialize(struct psp_context *psp)
|
|||
if (ret)
|
||||
dev_warn(adev->dev, "PSP set boot config failed\n");
|
||||
else
|
||||
dev_warn(adev->dev, "GECC will be disabled in next boot cycle "
|
||||
"if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
|
||||
dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
|
||||
}
|
||||
} else {
|
||||
if (1 == boot_cfg) {
|
||||
if (boot_cfg == 1) {
|
||||
dev_info(adev->dev, "GECC is enabled\n");
|
||||
} else {
|
||||
/* enable GECC in next boot cycle if it is disabled
|
||||
|
@ -1609,6 +1655,8 @@ int psp_ras_initialize(struct psp_context *psp)
|
|||
ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
|
||||
if (!adev->gmc.xgmi.connected_to_cpu)
|
||||
ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
|
||||
ras_cmd->ras_in_message.init_flags.xcc_mask =
|
||||
adev->gfx.xcc_mask;
|
||||
|
||||
ret = psp_ta_load(psp, &psp->ras_context.context);
|
||||
|
||||
|
@ -1626,14 +1674,37 @@ int psp_ras_initialize(struct psp_context *psp)
|
|||
}
|
||||
|
||||
int psp_ras_trigger_error(struct psp_context *psp,
|
||||
struct ta_ras_trigger_error_input *info)
|
||||
struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
|
||||
{
|
||||
struct ta_ras_shared_memory *ras_cmd;
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
int ret;
|
||||
uint32_t dev_mask;
|
||||
|
||||
if (!psp->ras_context.context.initialized)
|
||||
return -EINVAL;
|
||||
|
||||
switch (info->block_id) {
|
||||
case TA_RAS_BLOCK__GFX:
|
||||
dev_mask = GET_MASK(GC, instance_mask);
|
||||
break;
|
||||
case TA_RAS_BLOCK__SDMA:
|
||||
dev_mask = GET_MASK(SDMA0, instance_mask);
|
||||
break;
|
||||
case TA_RAS_BLOCK__VCN:
|
||||
case TA_RAS_BLOCK__JPEG:
|
||||
dev_mask = GET_MASK(VCN, instance_mask);
|
||||
break;
|
||||
default:
|
||||
dev_mask = instance_mask;
|
||||
break;
|
||||
}
|
||||
|
||||
/* reuse sub_block_index for backward compatibility */
|
||||
dev_mask <<= AMDGPU_RAS_INST_SHIFT;
|
||||
dev_mask &= AMDGPU_RAS_INST_MASK;
|
||||
info->sub_block_index |= dev_mask;
|
||||
|
||||
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
|
||||
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
|
||||
|
||||
|
@ -2077,10 +2148,12 @@ static int psp_hw_start(struct psp_context *psp)
|
|||
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
|
||||
goto skip_pin_bo;
|
||||
|
||||
ret = psp_tmr_init(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP tmr init failed!\n");
|
||||
return ret;
|
||||
if (!psp_boottime_tmr(psp)) {
|
||||
ret = psp_tmr_init(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP tmr init failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
skip_pin_bo:
|
||||
|
@ -2363,7 +2436,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
|
|||
}
|
||||
|
||||
static int psp_execute_non_psp_fw_load(struct psp_context *psp,
|
||||
struct amdgpu_firmware_info *ucode)
|
||||
struct amdgpu_firmware_info *ucode)
|
||||
{
|
||||
int ret = 0;
|
||||
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
|
||||
|
@ -2402,9 +2475,8 @@ static int psp_load_smu_fw(struct psp_context *psp)
|
|||
(adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
|
||||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) {
|
||||
ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
|
||||
if (ret) {
|
||||
if (ret)
|
||||
DRM_WARN("Failed to set MP1 state prepare for reload\n");
|
||||
}
|
||||
}
|
||||
|
||||
ret = psp_execute_non_psp_fw_load(psp, ucode);
|
||||
|
@ -2655,8 +2727,6 @@ static int psp_hw_fini(void *handle)
|
|||
|
||||
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
|
||||
|
||||
psp_free_shared_bufs(psp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2716,9 +2786,8 @@ static int psp_suspend(void *handle)
|
|||
}
|
||||
|
||||
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
|
||||
if (ret) {
|
||||
if (ret)
|
||||
DRM_ERROR("PSP ring stop failed\n");
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
|
@ -2967,7 +3036,7 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
|
|||
psp->sos.fw_version = le32_to_cpu(desc->fw_version);
|
||||
psp->sos.feature_version = le32_to_cpu(desc->fw_version);
|
||||
psp->sos.size_bytes = le32_to_cpu(desc->size_bytes);
|
||||
psp->sos.start_addr = ucode_start_addr;
|
||||
psp->sos.start_addr = ucode_start_addr;
|
||||
break;
|
||||
case PSP_FW_TYPE_PSP_SYS_DRV:
|
||||
psp->sys.fw_version = le32_to_cpu(desc->fw_version);
|
||||
|
@ -3491,7 +3560,7 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size
|
|||
drm_dev_exit(idx);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR,
|
||||
static DEVICE_ATTR(usbc_pd_fw, 0644,
|
||||
psp_usbc_pd_fw_sysfs_read,
|
||||
psp_usbc_pd_fw_sysfs_write);
|
||||
|
||||
|
@ -3548,6 +3617,9 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
|
|||
void *fw_pri_cpu_addr;
|
||||
int ret;
|
||||
|
||||
if (adev->psp.vbflash_image_size == 0)
|
||||
return -EINVAL;
|
||||
|
||||
dev_info(adev->dev, "VBIOS flash to PSP started");
|
||||
|
||||
ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
|
||||
|
@ -3599,13 +3671,13 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
|
|||
}
|
||||
|
||||
static const struct bin_attribute psp_vbflash_bin_attr = {
|
||||
.attr = {.name = "psp_vbflash", .mode = 0664},
|
||||
.attr = {.name = "psp_vbflash", .mode = 0660},
|
||||
.size = 0,
|
||||
.write = amdgpu_psp_vbflash_write,
|
||||
.read = amdgpu_psp_vbflash_read,
|
||||
};
|
||||
|
||||
static DEVICE_ATTR(psp_vbflash_status, 0444, amdgpu_psp_vbflash_status, NULL);
|
||||
static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
|
||||
|
||||
int amdgpu_psp_sysfs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
|
@ -3618,6 +3690,7 @@ int amdgpu_psp_sysfs_init(struct amdgpu_device *adev)
|
|||
switch (adev->ip_versions[MP0_HWIP][0]) {
|
||||
case IP_VERSION(13, 0, 0):
|
||||
case IP_VERSION(13, 0, 7):
|
||||
case IP_VERSION(13, 0, 10):
|
||||
if (!psp->adev) {
|
||||
psp->adev = adev;
|
||||
psp_v13_0_set_psp_funcs(psp);
|
||||
|
@ -3673,8 +3746,7 @@ static void psp_sysfs_fini(struct amdgpu_device *adev)
|
|||
device_remove_file(adev->dev, &dev_attr_usbc_pd_fw);
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version psp_v3_1_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version psp_v3_1_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_PSP,
|
||||
.major = 3,
|
||||
.minor = 1,
|
||||
|
@ -3682,8 +3754,7 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block =
|
|||
.funcs = &psp_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version psp_v10_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version psp_v10_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_PSP,
|
||||
.major = 10,
|
||||
.minor = 0,
|
||||
|
@ -3691,8 +3762,7 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
|
|||
.funcs = &psp_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version psp_v11_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version psp_v11_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_PSP,
|
||||
.major = 11,
|
||||
.minor = 0,
|
||||
|
@ -3708,8 +3778,7 @@ const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = {
|
|||
.funcs = &psp_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version psp_v12_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version psp_v12_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_PSP,
|
||||
.major = 12,
|
||||
.minor = 0,
|
||||
|
|
|
@ -486,7 +486,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
|
|||
int psp_ras_enable_features(struct psp_context *psp,
|
||||
union ta_ras_cmd_input *info, bool enable);
|
||||
int psp_ras_trigger_error(struct psp_context *psp,
|
||||
struct ta_ras_trigger_error_input *info);
|
||||
struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
|
||||
int psp_ras_terminate(struct psp_context *psp);
|
||||
|
||||
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
|
||||
|
@ -519,6 +519,8 @@ int psp_load_fw_list(struct psp_context *psp,
|
|||
struct amdgpu_firmware_info **ucode_list, int ucode_count);
|
||||
void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
|
||||
|
||||
int psp_spatial_partition(struct psp_context *psp, int mode);
|
||||
|
||||
int is_psp_fw_valid(struct psp_bin_desc bin);
|
||||
|
||||
int amdgpu_psp_sysfs_init(struct amdgpu_device *adev);
|
||||
|
|
|
@ -256,6 +256,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
int block_id;
|
||||
uint32_t sub_block;
|
||||
u64 address, value;
|
||||
/* default value is 0 if the mask is not set by user */
|
||||
u32 instance_mask = 0;
|
||||
|
||||
if (*pos)
|
||||
return -EINVAL;
|
||||
|
@ -306,7 +308,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
data->op = op;
|
||||
|
||||
if (op == 2) {
|
||||
if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
|
||||
if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
|
||||
&sub_block, &address, &value, &instance_mask) != 4 &&
|
||||
sscanf(str, "%*s %*s %*s %u %llu %llu %u",
|
||||
&sub_block, &address, &value, &instance_mask) != 4 &&
|
||||
sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
|
||||
&sub_block, &address, &value) != 3 &&
|
||||
sscanf(str, "%*s %*s %*s %u %llu %llu",
|
||||
&sub_block, &address, &value) != 3)
|
||||
|
@ -314,6 +320,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
data->head.sub_block_index = sub_block;
|
||||
data->inject.address = address;
|
||||
data->inject.value = value;
|
||||
data->inject.instance_mask = instance_mask;
|
||||
}
|
||||
} else {
|
||||
if (size < sizeof(*data))
|
||||
|
@ -326,6 +333,46 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
|
||||
struct ras_debug_if *data)
|
||||
{
|
||||
int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
|
||||
uint32_t mask, inst_mask = data->inject.instance_mask;
|
||||
|
||||
/* no need to set instance mask if there is only one instance */
|
||||
if (num_xcc <= 1 && inst_mask) {
|
||||
data->inject.instance_mask = 0;
|
||||
dev_dbg(adev->dev,
|
||||
"RAS inject mask(0x%x) isn't supported and force it to 0.\n",
|
||||
inst_mask);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
switch (data->head.block) {
|
||||
case AMDGPU_RAS_BLOCK__GFX:
|
||||
mask = GENMASK(num_xcc - 1, 0);
|
||||
break;
|
||||
case AMDGPU_RAS_BLOCK__SDMA:
|
||||
mask = GENMASK(adev->sdma.num_instances - 1, 0);
|
||||
break;
|
||||
case AMDGPU_RAS_BLOCK__VCN:
|
||||
case AMDGPU_RAS_BLOCK__JPEG:
|
||||
mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
|
||||
break;
|
||||
default:
|
||||
mask = inst_mask;
|
||||
break;
|
||||
}
|
||||
|
||||
/* remove invalid bits in instance mask */
|
||||
data->inject.instance_mask &= mask;
|
||||
if (inst_mask != data->inject.instance_mask)
|
||||
dev_dbg(adev->dev,
|
||||
"Adjust RAS inject mask 0x%x to 0x%x\n",
|
||||
inst_mask, data->inject.instance_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: AMDGPU RAS debugfs control interface
|
||||
*
|
||||
|
@ -341,7 +388,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
* sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
|
||||
* name: the name of IP.
|
||||
*
|
||||
* inject has two more members than head, they are address, value.
|
||||
* inject has three more members than head, they are address, value and mask.
|
||||
* As their names indicate, inject operation will write the
|
||||
* value to the address.
|
||||
*
|
||||
|
@ -365,7 +412,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
*
|
||||
* echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
|
||||
* echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
|
||||
* echo "inject <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
|
||||
* echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
|
||||
*
|
||||
* Where N, is the card which you want to affect.
|
||||
*
|
||||
|
@ -382,13 +429,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
*
|
||||
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
|
||||
* The address and value are hexadecimal numbers, leading 0x is optional.
|
||||
* The mask means instance mask, is optional, default value is 0x1.
|
||||
*
|
||||
* For instance,
|
||||
*
|
||||
* .. code-block:: bash
|
||||
*
|
||||
* echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
|
||||
* echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
|
||||
* echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
|
||||
* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
|
||||
*
|
||||
* How to check the result of the operation?
|
||||
|
@ -460,6 +508,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
|
|||
break;
|
||||
}
|
||||
|
||||
amdgpu_ras_instance_mask_check(adev, &data);
|
||||
|
||||
/* data.inject.address is offset instead of absolute gpu address */
|
||||
ret = amdgpu_ras_error_inject(adev, &data.inject);
|
||||
break;
|
||||
|
@ -1115,15 +1165,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
|
|||
block_info.address);
|
||||
}
|
||||
|
||||
if (info->head.block == AMDGPU_RAS_BLOCK__GFX) {
|
||||
if (block_obj->hw_ops->ras_error_inject)
|
||||
ret = block_obj->hw_ops->ras_error_inject(adev, info);
|
||||
if (block_obj->hw_ops->ras_error_inject) {
|
||||
if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
|
||||
ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
|
||||
else /* Special ras_error_inject is defined (e.g: xgmi) */
|
||||
ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
|
||||
info->instance_mask);
|
||||
} else {
|
||||
/* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */
|
||||
if (block_obj->hw_ops->ras_error_inject)
|
||||
ret = block_obj->hw_ops->ras_error_inject(adev, &block_info);
|
||||
else /*If not defined .ras_error_inject, use default ras_error_inject*/
|
||||
ret = psp_ras_trigger_error(&adev->psp, &block_info);
|
||||
/* default path */
|
||||
ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
|
@ -1597,8 +1647,7 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
|
|||
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
|
||||
{
|
||||
/* Fatal error events are handled on host side */
|
||||
if (amdgpu_sriov_vf(adev) ||
|
||||
!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return;
|
||||
|
||||
if (adev->nbio.ras &&
|
||||
|
@ -2008,9 +2057,15 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
|
|||
/* Perform full reset in fatal error mode */
|
||||
if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
|
||||
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
|
||||
else
|
||||
else {
|
||||
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
|
||||
|
||||
if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
|
||||
ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
|
||||
reset_context.method = AMD_RESET_METHOD_MODE2;
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
|
||||
}
|
||||
atomic_set(&ras->in_recovery, 0);
|
||||
|
@ -2259,7 +2314,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
|
|||
atomic_set(&con->in_recovery, 0);
|
||||
con->eeprom_control.bad_channel_bitmap = 0;
|
||||
|
||||
max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count();
|
||||
max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
|
||||
amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
|
||||
|
||||
/* Todo: During test the SMU might fail to read the eeprom through I2C
|
||||
|
@ -2625,7 +2680,8 @@ release_con:
|
|||
|
||||
int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->gmc.xgmi.connected_to_cpu)
|
||||
if (adev->gmc.xgmi.connected_to_cpu ||
|
||||
adev->gmc.is_app_apu)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
@ -3104,3 +3160,143 @@ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
|
||||
{
|
||||
if (!err_type_name)
|
||||
return;
|
||||
|
||||
switch (err_type) {
|
||||
case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
|
||||
sprintf(err_type_name, "correctable");
|
||||
break;
|
||||
case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
|
||||
sprintf(err_type_name, "uncorrectable");
|
||||
break;
|
||||
default:
|
||||
sprintf(err_type_name, "unknown");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
|
||||
const struct amdgpu_ras_err_status_reg_entry *reg_entry,
|
||||
uint32_t instance,
|
||||
uint32_t *memory_id)
|
||||
{
|
||||
uint32_t err_status_lo_data, err_status_lo_offset;
|
||||
|
||||
if (!reg_entry)
|
||||
return false;
|
||||
|
||||
err_status_lo_offset =
|
||||
AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
|
||||
reg_entry->seg_lo, reg_entry->reg_lo);
|
||||
err_status_lo_data = RREG32(err_status_lo_offset);
|
||||
|
||||
if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
|
||||
!REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
|
||||
return false;
|
||||
|
||||
*memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
|
||||
const struct amdgpu_ras_err_status_reg_entry *reg_entry,
|
||||
uint32_t instance,
|
||||
unsigned long *err_cnt)
|
||||
{
|
||||
uint32_t err_status_hi_data, err_status_hi_offset;
|
||||
|
||||
if (!reg_entry)
|
||||
return false;
|
||||
|
||||
err_status_hi_offset =
|
||||
AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
|
||||
reg_entry->seg_hi, reg_entry->reg_hi);
|
||||
err_status_hi_data = RREG32(err_status_hi_offset);
|
||||
|
||||
if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
|
||||
!REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
|
||||
/* keep the check here in case we need to refer to the result later */
|
||||
dev_dbg(adev->dev, "Invalid err_info field\n");
|
||||
|
||||
/* read err count */
|
||||
*err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
|
||||
const struct amdgpu_ras_err_status_reg_entry *reg_list,
|
||||
uint32_t reg_list_size,
|
||||
const struct amdgpu_ras_memory_id_entry *mem_list,
|
||||
uint32_t mem_list_size,
|
||||
uint32_t instance,
|
||||
uint32_t err_type,
|
||||
unsigned long *err_count)
|
||||
{
|
||||
uint32_t memory_id;
|
||||
unsigned long err_cnt;
|
||||
char err_type_name[16];
|
||||
uint32_t i, j;
|
||||
|
||||
for (i = 0; i < reg_list_size; i++) {
|
||||
/* query memory_id from err_status_lo */
|
||||
if (!amdgpu_ras_inst_get_memory_id_field(adev, ®_list[i],
|
||||
instance, &memory_id))
|
||||
continue;
|
||||
|
||||
/* query err_cnt from err_status_hi */
|
||||
if (!amdgpu_ras_inst_get_err_cnt_field(adev, ®_list[i],
|
||||
instance, &err_cnt) ||
|
||||
!err_cnt)
|
||||
continue;
|
||||
|
||||
*err_count += err_cnt;
|
||||
|
||||
/* log the errors */
|
||||
amdgpu_ras_get_error_type_name(err_type, err_type_name);
|
||||
if (!mem_list) {
|
||||
/* memory_list is not supported */
|
||||
dev_info(adev->dev,
|
||||
"%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
|
||||
err_cnt, err_type_name,
|
||||
reg_list[i].block_name,
|
||||
instance, memory_id);
|
||||
} else {
|
||||
for (j = 0; j < mem_list_size; j++) {
|
||||
if (memory_id == mem_list[j].memory_id) {
|
||||
dev_info(adev->dev,
|
||||
"%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
|
||||
err_cnt, err_type_name,
|
||||
reg_list[i].block_name,
|
||||
instance, mem_list[j].name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
|
||||
const struct amdgpu_ras_err_status_reg_entry *reg_list,
|
||||
uint32_t reg_list_size,
|
||||
uint32_t instance)
|
||||
{
|
||||
uint32_t err_status_lo_offset, err_status_hi_offset;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < reg_list_size; i++) {
|
||||
err_status_lo_offset =
|
||||
AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
|
||||
reg_list[i].seg_lo, reg_list[i].reg_lo);
|
||||
err_status_hi_offset =
|
||||
AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
|
||||
reg_list[i].seg_hi, reg_list[i].reg_hi);
|
||||
WREG32(err_status_lo_offset, 0);
|
||||
WREG32(err_status_hi_offset, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,11 @@
|
|||
struct amdgpu_iv_entry;
|
||||
|
||||
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
|
||||
/* position of instance value in sub_block_index of
|
||||
* ta_ras_trigger_error_input, the sub block uses lower 12 bits
|
||||
*/
|
||||
#define AMDGPU_RAS_INST_MASK 0xfffff000
|
||||
#define AMDGPU_RAS_INST_SHIFT 0xc
|
||||
|
||||
enum amdgpu_ras_block {
|
||||
AMDGPU_RAS_BLOCK__UMC = 0,
|
||||
|
@ -314,6 +319,45 @@ enum amdgpu_ras_ret {
|
|||
AMDGPU_RAS_PT,
|
||||
};
|
||||
|
||||
/* ras error status reisger fields */
|
||||
#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0
|
||||
#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L
|
||||
#define ERR_STATUS_LO__MEMORY_ID__SHIFT 0x18
|
||||
#define ERR_STATUS_LO__MEMORY_ID_MASK 0xFF000000L
|
||||
#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG__SHIFT 0x2
|
||||
#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG_MASK 0x00000004L
|
||||
#define ERR_STATUS__ERR_CNT__SHIFT 0x17
|
||||
#define ERR_STATUS__ERR_CNT_MASK 0x03800000L
|
||||
|
||||
#define AMDGPU_RAS_REG_ENTRY(ip, inst, reg_lo, reg_hi) \
|
||||
ip##_HWIP, inst, reg_lo##_BASE_IDX, reg_lo, reg_hi##_BASE_IDX, reg_hi
|
||||
|
||||
#define AMDGPU_RAS_REG_ENTRY_OFFSET(hwip, ip_inst, segment, reg) \
|
||||
(adev->reg_offset[hwip][ip_inst][segment] + (reg))
|
||||
|
||||
#define AMDGPU_RAS_ERR_INFO_VALID (1 << 0)
|
||||
#define AMDGPU_RAS_ERR_STATUS_VALID (1 << 1)
|
||||
#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
|
||||
|
||||
#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
|
||||
|
||||
struct amdgpu_ras_err_status_reg_entry {
|
||||
uint32_t hwip;
|
||||
uint32_t ip_inst;
|
||||
uint32_t seg_lo;
|
||||
uint32_t reg_lo;
|
||||
uint32_t seg_hi;
|
||||
uint32_t reg_hi;
|
||||
uint32_t reg_inst;
|
||||
uint32_t flags;
|
||||
const char *block_name;
|
||||
};
|
||||
|
||||
struct amdgpu_ras_memory_id_entry {
|
||||
uint32_t memory_id;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
struct ras_common_if {
|
||||
enum amdgpu_ras_block block;
|
||||
enum amdgpu_ras_error_type type;
|
||||
|
@ -385,6 +429,9 @@ struct amdgpu_ras {
|
|||
|
||||
/* Indicates smu whether need update bad channel info */
|
||||
bool update_channel_flag;
|
||||
|
||||
/* Record special requirements of gpu reset caller */
|
||||
uint32_t gpu_reset_flags;
|
||||
};
|
||||
|
||||
struct ras_fs_data {
|
||||
|
@ -471,6 +518,7 @@ struct ras_inject_if {
|
|||
struct ras_common_if head;
|
||||
uint64_t address;
|
||||
uint64_t value;
|
||||
uint32_t instance_mask;
|
||||
};
|
||||
|
||||
struct ras_cure_if {
|
||||
|
@ -508,7 +556,8 @@ struct amdgpu_ras_block_object {
|
|||
};
|
||||
|
||||
struct amdgpu_ras_block_hw_ops {
|
||||
int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
|
||||
int (*ras_error_inject)(struct amdgpu_device *adev,
|
||||
void *inject_if, uint32_t instance_mask);
|
||||
void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
|
||||
void (*query_ras_error_status)(struct amdgpu_device *adev);
|
||||
void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
|
||||
|
@ -696,4 +745,25 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co
|
|||
int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
|
||||
struct amdgpu_ras_block_object *ras_block_obj);
|
||||
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
|
||||
void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name);
|
||||
bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
|
||||
const struct amdgpu_ras_err_status_reg_entry *reg_entry,
|
||||
uint32_t instance,
|
||||
uint32_t *memory_id);
|
||||
bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
|
||||
const struct amdgpu_ras_err_status_reg_entry *reg_entry,
|
||||
uint32_t instance,
|
||||
unsigned long *err_cnt);
|
||||
void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
|
||||
const struct amdgpu_ras_err_status_reg_entry *reg_list,
|
||||
uint32_t reg_list_size,
|
||||
const struct amdgpu_ras_memory_id_entry *mem_list,
|
||||
uint32_t mem_list_size,
|
||||
uint32_t instance,
|
||||
uint32_t err_type,
|
||||
unsigned long *err_count);
|
||||
void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
|
||||
const struct amdgpu_ras_err_status_reg_entry *reg_list,
|
||||
uint32_t reg_list_size,
|
||||
uint32_t instance);
|
||||
#endif
|
||||
|
|
|
@ -68,11 +68,24 @@
|
|||
|
||||
/* Table hdr is 'AMDR' */
|
||||
#define RAS_TABLE_HDR_VAL 0x414d4452
|
||||
#define RAS_TABLE_VER 0x00010000
|
||||
|
||||
/* Bad GPU tag ‘BADG’ */
|
||||
#define RAS_TABLE_HDR_BAD 0x42414447
|
||||
|
||||
/**
|
||||
* EEPROM Table structure v1
|
||||
* ---------------------------------
|
||||
* | |
|
||||
* | EEPROM TABLE HEADER |
|
||||
* | ( size 20 Bytes ) |
|
||||
* | |
|
||||
* ---------------------------------
|
||||
* | |
|
||||
* | BAD PAGE RECORD AREA |
|
||||
* | |
|
||||
* ---------------------------------
|
||||
*/
|
||||
|
||||
/* Assume 2-Mbit size EEPROM and take up the whole space. */
|
||||
#define RAS_TBL_SIZE_BYTES (256 * 1024)
|
||||
#define RAS_TABLE_START 0
|
||||
|
@ -81,6 +94,35 @@
|
|||
#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
|
||||
/ RAS_TABLE_RECORD_SIZE)
|
||||
|
||||
/**
|
||||
* EEPROM Table structrue v2.1
|
||||
* ---------------------------------
|
||||
* | |
|
||||
* | EEPROM TABLE HEADER |
|
||||
* | ( size 20 Bytes ) |
|
||||
* | |
|
||||
* ---------------------------------
|
||||
* | |
|
||||
* | EEPROM TABLE RAS INFO |
|
||||
* | (available info size 4 Bytes) |
|
||||
* | ( reserved size 252 Bytes ) |
|
||||
* | |
|
||||
* ---------------------------------
|
||||
* | |
|
||||
* | BAD PAGE RECORD AREA |
|
||||
* | |
|
||||
* ---------------------------------
|
||||
*/
|
||||
|
||||
/* EEPROM Table V2_1 */
|
||||
#define RAS_TABLE_V2_1_INFO_SIZE 256
|
||||
#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE
|
||||
#define RAS_RECORD_START_V2_1 (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \
|
||||
RAS_TABLE_V2_1_INFO_SIZE)
|
||||
#define RAS_MAX_RECORD_COUNT_V2_1 ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \
|
||||
RAS_TABLE_V2_1_INFO_SIZE) \
|
||||
/ RAS_TABLE_RECORD_SIZE)
|
||||
|
||||
/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
|
||||
* offset off of RAS_TABLE_START. That is, this is something you can
|
||||
* add to control->i2c_address, and then tell I2C layer to read
|
||||
|
@ -103,6 +145,10 @@
|
|||
#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
|
||||
RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
|
||||
|
||||
#define RAS_NUM_RECS_V2_1(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
|
||||
RAS_TABLE_HEADER_SIZE - \
|
||||
RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
|
||||
|
||||
#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
|
||||
|
||||
static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
|
||||
|
@ -230,6 +276,69 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
|
|||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
|
||||
unsigned char *buf)
|
||||
{
|
||||
u32 *pp = (uint32_t *)buf;
|
||||
u32 tmp;
|
||||
|
||||
tmp = ((uint32_t)(rai->rma_status) & 0xFF) |
|
||||
(((uint32_t)(rai->health_percent) << 8) & 0xFF00) |
|
||||
(((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000);
|
||||
pp[0] = cpu_to_le32(tmp);
|
||||
}
|
||||
|
||||
static void
|
||||
__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
|
||||
unsigned char *buf)
|
||||
{
|
||||
u32 *pp = (uint32_t *)buf;
|
||||
u32 tmp;
|
||||
|
||||
tmp = le32_to_cpu(pp[0]);
|
||||
rai->rma_status = tmp & 0xFF;
|
||||
rai->health_percent = (tmp >> 8) & 0xFF;
|
||||
rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF;
|
||||
}
|
||||
|
||||
static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
|
||||
{
|
||||
struct amdgpu_device *adev = to_amdgpu_device(control);
|
||||
u8 *buf;
|
||||
int res;
|
||||
|
||||
buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
|
||||
if (!buf) {
|
||||
DRM_ERROR("Failed to alloc buf to write table ras info\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
__encode_table_ras_info_to_buf(&control->tbl_rai, buf);
|
||||
|
||||
/* i2c may be unstable in gpu reset */
|
||||
down_read(&adev->reset_domain->sem);
|
||||
res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
|
||||
control->i2c_address +
|
||||
control->ras_info_offset,
|
||||
buf, RAS_TABLE_V2_1_INFO_SIZE);
|
||||
up_read(&adev->reset_domain->sem);
|
||||
|
||||
if (res < 0) {
|
||||
DRM_ERROR("Failed to write EEPROM table ras info:%d", res);
|
||||
} else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
|
||||
DRM_ERROR("Short write:%d out of %d\n",
|
||||
res, RAS_TABLE_V2_1_INFO_SIZE);
|
||||
res = -EIO;
|
||||
} else {
|
||||
res = 0;
|
||||
}
|
||||
|
||||
kfree(buf);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
|
||||
{
|
||||
int ii;
|
||||
|
@ -246,6 +355,21 @@ static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
|
|||
return csum;
|
||||
}
|
||||
|
||||
static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control)
|
||||
{
|
||||
int ii;
|
||||
u8 *pp, csum;
|
||||
size_t sz;
|
||||
|
||||
sz = sizeof(control->tbl_rai);
|
||||
pp = (u8 *) &control->tbl_rai;
|
||||
csum = 0;
|
||||
for (ii = 0; ii < sz; ii++, pp++)
|
||||
csum += *pp;
|
||||
|
||||
return csum;
|
||||
}
|
||||
|
||||
static int amdgpu_ras_eeprom_correct_header_tag(
|
||||
struct amdgpu_ras_eeprom_control *control,
|
||||
uint32_t header)
|
||||
|
@ -282,6 +406,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
|
|||
{
|
||||
struct amdgpu_device *adev = to_amdgpu_device(control);
|
||||
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
|
||||
struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
u8 csum;
|
||||
int res;
|
||||
|
@ -289,14 +414,37 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
|
|||
mutex_lock(&control->ras_tbl_mutex);
|
||||
|
||||
hdr->header = RAS_TABLE_HDR_VAL;
|
||||
hdr->version = RAS_TABLE_VER;
|
||||
hdr->first_rec_offset = RAS_RECORD_START;
|
||||
hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
|
||||
if (adev->umc.ras &&
|
||||
adev->umc.ras->set_eeprom_table_version)
|
||||
adev->umc.ras->set_eeprom_table_version(hdr);
|
||||
else
|
||||
hdr->version = RAS_TABLE_VER_V1;
|
||||
|
||||
if (hdr->version == RAS_TABLE_VER_V2_1) {
|
||||
hdr->first_rec_offset = RAS_RECORD_START_V2_1;
|
||||
hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
|
||||
RAS_TABLE_V2_1_INFO_SIZE;
|
||||
rai->rma_status = GPU_HEALTH_USABLE;
|
||||
/**
|
||||
* GPU health represented as a percentage.
|
||||
* 0 means worst health, 100 means fully health.
|
||||
*/
|
||||
rai->health_percent = 100;
|
||||
/* ecc_page_threshold = 0 means disable bad page retirement */
|
||||
rai->ecc_page_threshold = con->bad_page_cnt_threshold;
|
||||
} else {
|
||||
hdr->first_rec_offset = RAS_RECORD_START;
|
||||
hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
|
||||
}
|
||||
|
||||
csum = __calc_hdr_byte_sum(control);
|
||||
if (hdr->version == RAS_TABLE_VER_V2_1)
|
||||
csum += __calc_ras_info_byte_sum(control);
|
||||
csum = -csum;
|
||||
hdr->checksum = csum;
|
||||
res = __write_table_header(control);
|
||||
if (!res && hdr->version > RAS_TABLE_VER_V1)
|
||||
res = __write_table_ras_info(control);
|
||||
|
||||
control->ras_num_recs = 0;
|
||||
control->ras_fri = 0;
|
||||
|
@ -573,11 +721,19 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
|
|||
"Saved bad pages %d reaches threshold value %d\n",
|
||||
control->ras_num_recs, ras->bad_page_cnt_threshold);
|
||||
control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
|
||||
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
|
||||
control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
|
||||
control->tbl_rai.health_percent = 0;
|
||||
}
|
||||
}
|
||||
|
||||
control->tbl_hdr.version = RAS_TABLE_VER;
|
||||
control->tbl_hdr.first_rec_offset = RAS_INDEX_TO_OFFSET(control, control->ras_fri);
|
||||
control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
|
||||
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
|
||||
control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
|
||||
RAS_TABLE_V2_1_INFO_SIZE +
|
||||
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
|
||||
else
|
||||
control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
|
||||
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
|
||||
control->tbl_hdr.checksum = 0;
|
||||
|
||||
buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
|
||||
|
@ -606,6 +762,17 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
|
|||
goto Out;
|
||||
}
|
||||
|
||||
/**
|
||||
* bad page records have been stored in eeprom,
|
||||
* now calculate gpu health percent
|
||||
*/
|
||||
if (amdgpu_bad_page_threshold != 0 &&
|
||||
control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
|
||||
control->ras_num_recs < ras->bad_page_cnt_threshold)
|
||||
control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
|
||||
control->ras_num_recs) * 100) /
|
||||
ras->bad_page_cnt_threshold;
|
||||
|
||||
/* Recalc the checksum.
|
||||
*/
|
||||
csum = 0;
|
||||
|
@ -613,10 +780,14 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
|
|||
csum += *pp;
|
||||
|
||||
csum += __calc_hdr_byte_sum(control);
|
||||
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
|
||||
csum += __calc_ras_info_byte_sum(control);
|
||||
/* avoid sign extension when assigning to "checksum" */
|
||||
csum = -csum;
|
||||
control->tbl_hdr.checksum = csum;
|
||||
res = __write_table_header(control);
|
||||
if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1)
|
||||
res = __write_table_ras_info(control);
|
||||
Out:
|
||||
kfree(buf);
|
||||
return res;
|
||||
|
@ -807,9 +978,12 @@ Out:
|
|||
return res;
|
||||
}
|
||||
|
||||
uint32_t amdgpu_ras_eeprom_max_record_count(void)
|
||||
uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
|
||||
{
|
||||
return RAS_MAX_RECORD_COUNT;
|
||||
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
|
||||
return RAS_MAX_RECORD_COUNT_V2_1;
|
||||
else
|
||||
return RAS_MAX_RECORD_COUNT;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
|
@ -1051,8 +1225,14 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
|
|||
int buf_size, res;
|
||||
u8 csum, *buf, *pp;
|
||||
|
||||
buf_size = RAS_TABLE_HEADER_SIZE +
|
||||
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
|
||||
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
|
||||
buf_size = RAS_TABLE_HEADER_SIZE +
|
||||
RAS_TABLE_V2_1_INFO_SIZE +
|
||||
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
|
||||
else
|
||||
buf_size = RAS_TABLE_HEADER_SIZE +
|
||||
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
|
||||
|
||||
buf = kzalloc(buf_size, GFP_KERNEL);
|
||||
if (!buf) {
|
||||
DRM_ERROR("Out of memory checking RAS table checksum.\n");
|
||||
|
@ -1080,6 +1260,39 @@ Out:
|
|||
return res < 0 ? res : csum;
|
||||
}
|
||||
|
||||
static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
|
||||
{
|
||||
struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
|
||||
struct amdgpu_device *adev = to_amdgpu_device(control);
|
||||
unsigned char *buf;
|
||||
int res;
|
||||
|
||||
buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
|
||||
if (!buf) {
|
||||
DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/**
|
||||
* EEPROM table V2_1 supports ras info,
|
||||
* read EEPROM table ras info
|
||||
*/
|
||||
res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
|
||||
control->i2c_address + control->ras_info_offset,
|
||||
buf, RAS_TABLE_V2_1_INFO_SIZE);
|
||||
if (res < RAS_TABLE_V2_1_INFO_SIZE) {
|
||||
DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res);
|
||||
res = res >= 0 ? -EIO : res;
|
||||
goto Out;
|
||||
}
|
||||
|
||||
__decode_table_ras_info_from_buf(rai, buf);
|
||||
|
||||
Out:
|
||||
kfree(buf);
|
||||
return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
|
||||
}
|
||||
|
||||
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
|
||||
bool *exceed_err_limit)
|
||||
{
|
||||
|
@ -1102,8 +1315,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
|
|||
return -EINVAL;
|
||||
|
||||
control->ras_header_offset = RAS_HDR_START;
|
||||
control->ras_record_offset = RAS_RECORD_START;
|
||||
control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
|
||||
control->ras_info_offset = RAS_TABLE_V2_1_INFO_START;
|
||||
mutex_init(&control->ras_tbl_mutex);
|
||||
|
||||
/* Read the table header from EEPROM address */
|
||||
|
@ -1117,12 +1329,27 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
|
|||
|
||||
__decode_table_header_from_buf(hdr, buf);
|
||||
|
||||
control->ras_num_recs = RAS_NUM_RECS(hdr);
|
||||
if (hdr->version == RAS_TABLE_VER_V2_1) {
|
||||
control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
|
||||
control->ras_record_offset = RAS_RECORD_START_V2_1;
|
||||
control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
|
||||
} else {
|
||||
control->ras_num_recs = RAS_NUM_RECS(hdr);
|
||||
control->ras_record_offset = RAS_RECORD_START;
|
||||
control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
|
||||
}
|
||||
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
|
||||
|
||||
if (hdr->header == RAS_TABLE_HDR_VAL) {
|
||||
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
|
||||
control->ras_num_recs);
|
||||
|
||||
if (hdr->version == RAS_TABLE_VER_V2_1) {
|
||||
res = __read_table_ras_info(control);
|
||||
if (res)
|
||||
return res;
|
||||
}
|
||||
|
||||
res = __verify_ras_table_checksum(control);
|
||||
if (res)
|
||||
DRM_ERROR("RAS table incorrect checksum or error:%d\n",
|
||||
|
@ -1136,6 +1363,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
|
|||
ras->bad_page_cnt_threshold);
|
||||
} else if (hdr->header == RAS_TABLE_HDR_BAD &&
|
||||
amdgpu_bad_page_threshold != 0) {
|
||||
if (hdr->version == RAS_TABLE_VER_V2_1) {
|
||||
res = __read_table_ras_info(control);
|
||||
if (res)
|
||||
return res;
|
||||
}
|
||||
|
||||
res = __verify_ras_table_checksum(control);
|
||||
if (res)
|
||||
DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
|
||||
|
|
|
@ -26,8 +26,16 @@
|
|||
|
||||
#include <linux/i2c.h>
|
||||
|
||||
#define RAS_TABLE_VER_V1 0x00010000
|
||||
#define RAS_TABLE_VER_V2_1 0x00021000
|
||||
|
||||
struct amdgpu_device;
|
||||
|
||||
enum amdgpu_ras_gpu_health_status {
|
||||
GPU_HEALTH_USABLE = 0,
|
||||
GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
|
||||
};
|
||||
|
||||
enum amdgpu_ras_eeprom_err_type {
|
||||
AMDGPU_RAS_EEPROM_ERR_NA,
|
||||
AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
|
||||
|
@ -43,9 +51,18 @@ struct amdgpu_ras_eeprom_table_header {
|
|||
uint32_t checksum;
|
||||
} __packed;
|
||||
|
||||
struct amdgpu_ras_eeprom_table_ras_info {
|
||||
u8 rma_status;
|
||||
u8 health_percent;
|
||||
u16 ecc_page_threshold;
|
||||
u32 padding[64 - 1];
|
||||
} __packed;
|
||||
|
||||
struct amdgpu_ras_eeprom_control {
|
||||
struct amdgpu_ras_eeprom_table_header tbl_hdr;
|
||||
|
||||
struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
|
||||
|
||||
/* Base I2C EEPPROM 19-bit memory address,
|
||||
* where the table is located. For more information,
|
||||
* see top of amdgpu_eeprom.c.
|
||||
|
@ -58,6 +75,7 @@ struct amdgpu_ras_eeprom_control {
|
|||
* right after the header.
|
||||
*/
|
||||
u32 ras_header_offset;
|
||||
u32 ras_info_offset;
|
||||
u32 ras_record_offset;
|
||||
|
||||
/* Number of records in the table.
|
||||
|
@ -124,7 +142,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
|
|||
int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
|
||||
struct eeprom_table_record *records, const u32 num);
|
||||
|
||||
uint32_t amdgpu_ras_eeprom_max_record_count(void);
|
||||
uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
|
||||
|
||||
void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
|
|||
|
||||
switch (adev->ip_versions[MP1_HWIP][0]) {
|
||||
case IP_VERSION(13, 0, 2):
|
||||
case IP_VERSION(13, 0, 6):
|
||||
ret = aldebaran_reset_init(adev);
|
||||
break;
|
||||
case IP_VERSION(11, 0, 7):
|
||||
|
@ -61,6 +62,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
|
|||
|
||||
switch (adev->ip_versions[MP1_HWIP][0]) {
|
||||
case IP_VERSION(13, 0, 2):
|
||||
case IP_VERSION(13, 0, 6):
|
||||
ret = aldebaran_reset_fini(adev);
|
||||
break;
|
||||
case IP_VERSION(11, 0, 7):
|
||||
|
|
|
@ -49,6 +49,26 @@
|
|||
* them until the pointers are equal again.
|
||||
*/
|
||||
|
||||
/**
|
||||
* amdgpu_ring_max_ibs - Return max IBs that fit in a single submission.
|
||||
*
|
||||
* @type: ring type for which to return the limit.
|
||||
*/
|
||||
unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case AMDGPU_RING_TYPE_GFX:
|
||||
/* Need to keep at least 192 on GFX7+ for old radv. */
|
||||
return 192;
|
||||
case AMDGPU_RING_TYPE_COMPUTE:
|
||||
return 125;
|
||||
case AMDGPU_RING_TYPE_VCN_JPEG:
|
||||
return 16;
|
||||
default:
|
||||
return 49;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ring_alloc - allocate space on the ring buffer
|
||||
*
|
||||
|
@ -58,7 +78,7 @@
|
|||
* Allocate @ndw dwords in the ring buffer (all asics).
|
||||
* Returns 0 on success, error on failure.
|
||||
*/
|
||||
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
|
||||
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
|
||||
{
|
||||
/* Align requested size with padding so unlock_commit can
|
||||
* pad safely */
|
||||
|
@ -182,6 +202,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
|||
int sched_hw_submission = amdgpu_sched_hw_submission;
|
||||
u32 *num_sched;
|
||||
u32 hw_ip;
|
||||
unsigned int max_ibs_dw;
|
||||
|
||||
/* Set the hw submission limit higher for KIQ because
|
||||
* it's used for a number of gfx/compute tasks by both
|
||||
|
@ -290,6 +311,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
|||
return r;
|
||||
}
|
||||
|
||||
max_ibs_dw = ring->funcs->emit_frame_size +
|
||||
amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
|
||||
max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
|
||||
|
||||
if (WARN_ON(max_ibs_dw > max_dw))
|
||||
max_dw = max_ibs_dw;
|
||||
|
||||
ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
|
||||
|
||||
ring->buf_mask = (ring->ring_size / 4) - 1;
|
||||
|
@ -361,6 +389,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
|
|||
amdgpu_bo_free_kernel(&ring->ring_obj,
|
||||
&ring->gpu_addr,
|
||||
(void **)&ring->ring);
|
||||
} else {
|
||||
kfree(ring->fence_drv.fences);
|
||||
}
|
||||
|
||||
dma_fence_put(ring->vmid_wait);
|
||||
|
@ -478,6 +508,59 @@ static const struct file_operations amdgpu_debugfs_ring_fops = {
|
|||
.llseek = default_llseek
|
||||
};
|
||||
|
||||
static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
|
||||
size_t size, loff_t *pos)
|
||||
{
|
||||
struct amdgpu_ring *ring = file_inode(f)->i_private;
|
||||
volatile u32 *mqd;
|
||||
int r;
|
||||
uint32_t value, result;
|
||||
|
||||
if (*pos & 3 || size & 3)
|
||||
return -EINVAL;
|
||||
|
||||
result = 0;
|
||||
|
||||
r = amdgpu_bo_reserve(ring->mqd_obj, false);
|
||||
if (unlikely(r != 0))
|
||||
return r;
|
||||
|
||||
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
|
||||
if (r) {
|
||||
amdgpu_bo_unreserve(ring->mqd_obj);
|
||||
return r;
|
||||
}
|
||||
|
||||
while (size) {
|
||||
if (*pos >= ring->mqd_size)
|
||||
goto done;
|
||||
|
||||
value = mqd[*pos/4];
|
||||
r = put_user(value, (uint32_t *)buf);
|
||||
if (r)
|
||||
goto done;
|
||||
buf += 4;
|
||||
result += 4;
|
||||
size -= 4;
|
||||
*pos += 4;
|
||||
}
|
||||
|
||||
done:
|
||||
amdgpu_bo_kunmap(ring->mqd_obj);
|
||||
mqd = NULL;
|
||||
amdgpu_bo_unreserve(ring->mqd_obj);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static const struct file_operations amdgpu_debugfs_mqd_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = amdgpu_debugfs_mqd_read,
|
||||
.llseek = default_llseek
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
|
||||
|
@ -489,10 +572,16 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
|
|||
char name[32];
|
||||
|
||||
sprintf(name, "amdgpu_ring_%s", ring->name);
|
||||
debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring,
|
||||
debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
|
||||
&amdgpu_debugfs_ring_fops,
|
||||
ring->ring_size + 12);
|
||||
|
||||
if (ring->mqd_obj) {
|
||||
sprintf(name, "amdgpu_mqd_%s", ring->name);
|
||||
debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
|
||||
&amdgpu_debugfs_mqd_fops,
|
||||
ring->mqd_size);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -581,3 +670,21 @@ void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
|
|||
if (ring->is_sw_ring)
|
||||
amdgpu_sw_ring_ib_end(ring);
|
||||
}
|
||||
|
||||
void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring)
|
||||
{
|
||||
if (ring->is_sw_ring)
|
||||
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL);
|
||||
}
|
||||
|
||||
void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring)
|
||||
{
|
||||
if (ring->is_sw_ring)
|
||||
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE);
|
||||
}
|
||||
|
||||
void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
|
||||
{
|
||||
if (ring->is_sw_ring)
|
||||
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
|
||||
}
|
||||
|
|
|
@ -37,8 +37,8 @@ struct amdgpu_job;
|
|||
struct amdgpu_vm;
|
||||
|
||||
/* max number of rings */
|
||||
#define AMDGPU_MAX_RINGS 28
|
||||
#define AMDGPU_MAX_HWIP_RINGS 8
|
||||
#define AMDGPU_MAX_RINGS 124
|
||||
#define AMDGPU_MAX_HWIP_RINGS 64
|
||||
#define AMDGPU_MAX_GFX_RINGS 2
|
||||
#define AMDGPU_MAX_SW_GFX_RINGS 2
|
||||
#define AMDGPU_MAX_COMPUTE_RINGS 8
|
||||
|
@ -212,6 +212,8 @@ struct amdgpu_ring_funcs {
|
|||
void (*end_use)(struct amdgpu_ring *ring);
|
||||
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
|
||||
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
|
||||
void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va,
|
||||
u64 gds_va, bool init_shadow, int vmid);
|
||||
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
|
||||
uint32_t reg_val_offs);
|
||||
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
|
||||
|
@ -227,6 +229,9 @@ struct amdgpu_ring_funcs {
|
|||
int (*preempt_ib)(struct amdgpu_ring *ring);
|
||||
void (*emit_mem_sync)(struct amdgpu_ring *ring);
|
||||
void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
|
||||
void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
|
||||
void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
|
||||
void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
|
||||
};
|
||||
|
||||
struct amdgpu_ring {
|
||||
|
@ -250,12 +255,14 @@ struct amdgpu_ring {
|
|||
uint32_t buf_mask;
|
||||
u32 idx;
|
||||
u32 xcc_id;
|
||||
u32 xcp_id;
|
||||
u32 me;
|
||||
u32 pipe;
|
||||
u32 queue;
|
||||
struct amdgpu_bo *mqd_obj;
|
||||
uint64_t mqd_gpu_addr;
|
||||
void *mqd_ptr;
|
||||
unsigned mqd_size;
|
||||
uint64_t eop_gpu_addr;
|
||||
u32 doorbell_index;
|
||||
bool use_doorbell;
|
||||
|
@ -309,6 +316,7 @@ struct amdgpu_ring {
|
|||
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
|
||||
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
|
||||
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
|
||||
#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v)))
|
||||
#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
|
||||
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
|
||||
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
|
||||
|
@ -318,10 +326,17 @@ struct amdgpu_ring {
|
|||
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
|
||||
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
|
||||
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
|
||||
#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
|
||||
#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
|
||||
#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
|
||||
|
||||
unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
|
||||
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
|
||||
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
|
||||
|
||||
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
|
||||
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
|
||||
|
|
|
@ -105,6 +105,16 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
|
|||
amdgpu_fence_update_start_timestamp(e->ring,
|
||||
chunk->sync_seq,
|
||||
ktime_get());
|
||||
if (chunk->sync_seq ==
|
||||
le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) {
|
||||
if (chunk->cntl_offset <= e->ring->buf_mask)
|
||||
amdgpu_ring_patch_cntl(e->ring,
|
||||
chunk->cntl_offset);
|
||||
if (chunk->ce_offset <= e->ring->buf_mask)
|
||||
amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
|
||||
if (chunk->de_offset <= e->ring->buf_mask)
|
||||
amdgpu_ring_patch_de(e->ring, chunk->de_offset);
|
||||
}
|
||||
amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
|
||||
chunk->start,
|
||||
chunk->end);
|
||||
|
@ -407,6 +417,17 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
|
|||
amdgpu_ring_mux_end_ib(mux, ring);
|
||||
}
|
||||
|
||||
void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
|
||||
unsigned offset;
|
||||
|
||||
offset = ring->wptr & ring->buf_mask;
|
||||
|
||||
amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
|
||||
}
|
||||
|
||||
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
|
@ -429,6 +450,10 @@ void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *r
|
|||
}
|
||||
|
||||
chunk->start = ring->wptr;
|
||||
/* the initialized value used to check if they are set by the ib submission*/
|
||||
chunk->cntl_offset = ring->buf_mask + 1;
|
||||
chunk->de_offset = ring->buf_mask + 1;
|
||||
chunk->ce_offset = ring->buf_mask + 1;
|
||||
list_add_tail(&chunk->entry, &e->list);
|
||||
}
|
||||
|
||||
|
@ -454,6 +479,41 @@ static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct a
|
|||
}
|
||||
}
|
||||
|
||||
void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
|
||||
struct amdgpu_ring *ring, u64 offset,
|
||||
enum amdgpu_ring_mux_offset_type type)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
struct amdgpu_mux_chunk *chunk;
|
||||
|
||||
e = amdgpu_ring_mux_sw_entry(mux, ring);
|
||||
if (!e) {
|
||||
DRM_ERROR("cannot find entry!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
|
||||
if (!chunk) {
|
||||
DRM_ERROR("cannot find chunk!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
|
||||
chunk->cntl_offset = offset;
|
||||
break;
|
||||
case AMDGPU_MUX_OFFSET_TYPE_DE:
|
||||
chunk->de_offset = offset;
|
||||
break;
|
||||
case AMDGPU_MUX_OFFSET_TYPE_CE:
|
||||
chunk->ce_offset = offset;
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("invalid type (%d)\n", type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_mux_entry *e;
|
||||
|
|
|
@ -50,6 +50,12 @@ struct amdgpu_mux_entry {
|
|||
struct list_head list;
|
||||
};
|
||||
|
||||
enum amdgpu_ring_mux_offset_type {
|
||||
AMDGPU_MUX_OFFSET_TYPE_CONTROL,
|
||||
AMDGPU_MUX_OFFSET_TYPE_DE,
|
||||
AMDGPU_MUX_OFFSET_TYPE_CE,
|
||||
};
|
||||
|
||||
struct amdgpu_ring_mux {
|
||||
struct amdgpu_ring *real_ring;
|
||||
|
||||
|
@ -72,12 +78,18 @@ struct amdgpu_ring_mux {
|
|||
* @sync_seq: the fence seqno related with the saved IB.
|
||||
* @start:- start location on the software ring.
|
||||
* @end:- end location on the software ring.
|
||||
* @control_offset:- the PRE_RESUME bit position used for resubmission.
|
||||
* @de_offset:- the anchor in write_data for de meta of resubmission.
|
||||
* @ce_offset:- the anchor in write_data for ce meta of resubmission.
|
||||
*/
|
||||
struct amdgpu_mux_chunk {
|
||||
struct list_head entry;
|
||||
uint32_t sync_seq;
|
||||
u64 start;
|
||||
u64 end;
|
||||
u64 cntl_offset;
|
||||
u64 de_offset;
|
||||
u64 ce_offset;
|
||||
};
|
||||
|
||||
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
|
||||
|
@ -89,6 +101,8 @@ u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ri
|
|||
u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
|
||||
void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
|
||||
u64 offset, enum amdgpu_ring_mux_offset_type type);
|
||||
bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
|
||||
|
||||
u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
|
||||
|
@ -97,6 +111,7 @@ void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
|
|||
void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
|
||||
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
|
||||
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
|
||||
void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type);
|
||||
const char *amdgpu_sw_ring_name(int idx);
|
||||
unsigned int amdgpu_sw_ring_priority(int idx);
|
||||
|
||||
|
|
|
@ -31,12 +31,13 @@
|
|||
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @xcc_id: xcc accelerated compute core id
|
||||
*
|
||||
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
|
||||
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
if (adev->gfx.rlc.in_safe_mode)
|
||||
if (adev->gfx.rlc.in_safe_mode[xcc_id])
|
||||
return;
|
||||
|
||||
/* if RLC is not enabled, do nothing */
|
||||
|
@ -46,8 +47,8 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
|
|||
if (adev->cg_flags &
|
||||
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
|
||||
adev->gfx.rlc.funcs->set_safe_mode(adev);
|
||||
adev->gfx.rlc.in_safe_mode = true;
|
||||
adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id);
|
||||
adev->gfx.rlc.in_safe_mode[xcc_id] = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -55,12 +56,13 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
|
|||
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @xcc_id: xcc accelerated compute core id
|
||||
*
|
||||
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
|
||||
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
if (!(adev->gfx.rlc.in_safe_mode))
|
||||
if (!(adev->gfx.rlc.in_safe_mode[xcc_id]))
|
||||
return;
|
||||
|
||||
/* if RLC is not enabled, do nothing */
|
||||
|
@ -70,8 +72,8 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
|
|||
if (adev->cg_flags &
|
||||
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
|
||||
adev->gfx.rlc.funcs->unset_safe_mode(adev);
|
||||
adev->gfx.rlc.in_safe_mode = false;
|
||||
adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id);
|
||||
adev->gfx.rlc.in_safe_mode[xcc_id] = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -157,8 +157,8 @@ typedef struct _RLC_TABLE_OF_CONTENT {
|
|||
|
||||
struct amdgpu_rlc_funcs {
|
||||
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
|
||||
void (*set_safe_mode)(struct amdgpu_device *adev);
|
||||
void (*unset_safe_mode)(struct amdgpu_device *adev);
|
||||
void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id);
|
||||
void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
|
||||
int (*init)(struct amdgpu_device *adev);
|
||||
u32 (*get_csb_size)(struct amdgpu_device *adev);
|
||||
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
|
||||
|
@ -201,7 +201,7 @@ struct amdgpu_rlc {
|
|||
u32 cp_table_size;
|
||||
|
||||
/* safe mode for updating CG/PG state */
|
||||
bool in_safe_mode;
|
||||
bool in_safe_mode[8];
|
||||
const struct amdgpu_rlc_funcs *funcs;
|
||||
|
||||
/* for firmware data */
|
||||
|
@ -260,8 +260,8 @@ struct amdgpu_rlc {
|
|||
struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl;
|
||||
};
|
||||
|
||||
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
|
||||
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id);
|
||||
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
|
||||
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
|
||||
|
|
|
@ -64,7 +64,7 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
|
|||
}
|
||||
|
||||
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
|
||||
unsigned vmid)
|
||||
unsigned int vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint64_t csa_mc_addr;
|
||||
|
@ -252,6 +252,13 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
|
|||
if (!duplicate && (instance != i))
|
||||
continue;
|
||||
else {
|
||||
/* Use a single copy per SDMA firmware type. PSP uses the same instance for all
|
||||
* groups of SDMAs */
|
||||
if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2) &&
|
||||
adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
|
||||
adev->sdma.num_inst_per_aid == i) {
|
||||
break;
|
||||
}
|
||||
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
|
||||
info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
|
||||
info->fw = adev->sdma.instance[i].fw;
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#include "amdgpu_ras.h"
|
||||
|
||||
/* max number of IP instances */
|
||||
#define AMDGPU_MAX_SDMA_INSTANCES 8
|
||||
#define AMDGPU_MAX_SDMA_INSTANCES 16
|
||||
|
||||
enum amdgpu_sdma_irq {
|
||||
AMDGPU_SDMA_IRQ_INSTANCE0 = 0,
|
||||
|
@ -37,9 +37,19 @@ enum amdgpu_sdma_irq {
|
|||
AMDGPU_SDMA_IRQ_INSTANCE5,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE6,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE7,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE8,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE9,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE10,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE11,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE12,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE13,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE14,
|
||||
AMDGPU_SDMA_IRQ_INSTANCE15,
|
||||
AMDGPU_SDMA_IRQ_LAST
|
||||
};
|
||||
|
||||
#define NUM_SDMA(x) hweight32(x)
|
||||
|
||||
struct amdgpu_sdma_instance {
|
||||
/* SDMA firmware */
|
||||
const struct firmware *fw;
|
||||
|
@ -49,6 +59,35 @@ struct amdgpu_sdma_instance {
|
|||
struct amdgpu_ring ring;
|
||||
struct amdgpu_ring page;
|
||||
bool burst_nop;
|
||||
uint32_t aid_id;
|
||||
};
|
||||
|
||||
enum amdgpu_sdma_ras_memory_id {
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF0 = 1,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF1 = 2,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF2 = 3,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF3 = 4,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF4 = 5,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF5 = 6,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF6 = 7,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF7 = 8,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF8 = 9,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF9 = 10,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF10 = 11,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF11 = 12,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF12 = 13,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF13 = 14,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF14 = 15,
|
||||
AMDGPU_SDMA_MBANK_DATA_BUF15 = 16,
|
||||
AMDGPU_SDMA_UCODE_BUF = 17,
|
||||
AMDGPU_SDMA_RB_CMD_BUF = 18,
|
||||
AMDGPU_SDMA_IB_CMD_BUF = 19,
|
||||
AMDGPU_SDMA_UTCL1_RD_FIFO = 20,
|
||||
AMDGPU_SDMA_UTCL1_RDBST_FIFO = 21,
|
||||
AMDGPU_SDMA_UTCL1_WR_FIFO = 22,
|
||||
AMDGPU_SDMA_DATA_LUT_FIFO = 23,
|
||||
AMDGPU_SDMA_SPLIT_DAT_BUF = 24,
|
||||
AMDGPU_SDMA_MEMORY_BLOCK_LAST,
|
||||
};
|
||||
|
||||
struct amdgpu_sdma_ras {
|
||||
|
@ -66,6 +105,8 @@ struct amdgpu_sdma {
|
|||
struct amdgpu_irq_src srbm_write_irq;
|
||||
|
||||
int num_instances;
|
||||
uint32_t sdma_mask;
|
||||
int num_inst_per_aid;
|
||||
uint32_t srbm_soft_reset;
|
||||
bool has_page_queue;
|
||||
struct ras_common_if *ras_if;
|
||||
|
|
|
@ -30,6 +30,7 @@ struct amdgpu_smuio_funcs {
|
|||
void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
|
||||
u32 (*get_die_id)(struct amdgpu_device *adev);
|
||||
u32 (*get_socket_id)(struct amdgpu_device *adev);
|
||||
enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
|
||||
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
|
|
|
@ -38,7 +38,6 @@
|
|||
#include <linux/seq_file.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swiotlb.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/module.h>
|
||||
|
@ -65,7 +64,7 @@
|
|||
|
||||
MODULE_IMPORT_NS(DMA_BUF);
|
||||
|
||||
#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
|
||||
#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
|
||||
|
||||
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
|
||||
struct ttm_tt *ttm,
|
||||
|
@ -184,11 +183,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
|
|||
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
|
||||
struct ttm_resource *mem,
|
||||
struct amdgpu_res_cursor *mm_cur,
|
||||
unsigned window, struct amdgpu_ring *ring,
|
||||
unsigned int window, struct amdgpu_ring *ring,
|
||||
bool tmz, uint64_t *size, uint64_t *addr)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned offset, num_pages, num_dw, num_bytes;
|
||||
unsigned int offset, num_pages, num_dw, num_bytes;
|
||||
uint64_t src_addr, dst_addr;
|
||||
struct amdgpu_job *job;
|
||||
void *cpu_addr;
|
||||
|
@ -384,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
|
|||
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
|
||||
struct dma_fence *wipe_fence = NULL;
|
||||
|
||||
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence);
|
||||
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
|
||||
false);
|
||||
if (r) {
|
||||
goto error;
|
||||
} else if (wipe_fence) {
|
||||
|
@ -631,6 +631,7 @@ struct amdgpu_ttm_tt {
|
|||
struct task_struct *usertask;
|
||||
uint32_t userflags;
|
||||
bool bound;
|
||||
int32_t pool_id;
|
||||
};
|
||||
|
||||
#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
|
||||
|
@ -800,6 +801,44 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
|
|||
sg_free_table(ttm->sg);
|
||||
}
|
||||
|
||||
/*
|
||||
* total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
|
||||
* MQDn+CtrlStackn where n is the number of XCCs per partition.
|
||||
* pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
|
||||
* and uses memory type default, UC. The rest of pages_per_xcc are
|
||||
* Ctrl stack and modify their memory type to NC.
|
||||
*/
|
||||
static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
|
||||
struct ttm_tt *ttm, uint64_t flags)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
uint64_t total_pages = ttm->num_pages;
|
||||
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
|
||||
uint64_t page_idx, pages_per_xcc;
|
||||
int i;
|
||||
uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
|
||||
AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
|
||||
|
||||
pages_per_xcc = total_pages;
|
||||
do_div(pages_per_xcc, num_xcc);
|
||||
|
||||
for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
|
||||
/* MQD page: use default flags */
|
||||
amdgpu_gart_bind(adev,
|
||||
gtt->offset + (page_idx << PAGE_SHIFT),
|
||||
1, >t->ttm.dma_address[page_idx], flags);
|
||||
/*
|
||||
* Ctrl pages - modify the memory type to NC (ctrl_flags) from
|
||||
* the second page of the BO onward.
|
||||
*/
|
||||
amdgpu_gart_bind(adev,
|
||||
gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
|
||||
pages_per_xcc - 1,
|
||||
>t->ttm.dma_address[page_idx + 1],
|
||||
ctrl_flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
|
||||
struct ttm_buffer_object *tbo,
|
||||
uint64_t flags)
|
||||
|
@ -812,21 +851,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
|
|||
flags |= AMDGPU_PTE_TMZ;
|
||||
|
||||
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
|
||||
uint64_t page_idx = 1;
|
||||
|
||||
amdgpu_gart_bind(adev, gtt->offset, page_idx,
|
||||
gtt->ttm.dma_address, flags);
|
||||
|
||||
/* The memory type of the first page defaults to UC. Now
|
||||
* modify the memory type to NC from the second page of
|
||||
* the BO onward.
|
||||
*/
|
||||
flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
|
||||
flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
|
||||
|
||||
amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT),
|
||||
ttm->num_pages - page_idx,
|
||||
&(gtt->ttm.dma_address[page_idx]), flags);
|
||||
amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
|
||||
} else {
|
||||
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
|
||||
gtt->ttm.dma_address, flags);
|
||||
|
@ -1029,15 +1054,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
|
|||
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
|
||||
uint32_t page_flags)
|
||||
{
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
|
||||
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
|
||||
struct amdgpu_ttm_tt *gtt;
|
||||
enum ttm_caching caching;
|
||||
|
||||
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
|
||||
if (gtt == NULL) {
|
||||
if (!gtt)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gtt->gobj = &bo->base;
|
||||
if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
|
||||
gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
|
||||
else
|
||||
gtt->pool_id = abo->xcp_id;
|
||||
|
||||
if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
|
||||
caching = ttm_write_combined;
|
||||
|
@ -1064,6 +1094,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
|
|||
{
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
|
||||
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
|
||||
struct ttm_pool *pool;
|
||||
pgoff_t i;
|
||||
int ret;
|
||||
|
||||
|
@ -1078,7 +1109,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
|
|||
if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
|
||||
return 0;
|
||||
|
||||
ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
|
||||
if (adev->mman.ttm_pools && gtt->pool_id >= 0)
|
||||
pool = &adev->mman.ttm_pools[gtt->pool_id];
|
||||
else
|
||||
pool = &adev->mman.bdev.pool;
|
||||
ret = ttm_pool_alloc(pool, ttm, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -1099,6 +1134,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
|
|||
{
|
||||
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
|
||||
struct amdgpu_device *adev;
|
||||
struct ttm_pool *pool;
|
||||
pgoff_t i;
|
||||
|
||||
amdgpu_ttm_backend_unbind(bdev, ttm);
|
||||
|
@ -1117,7 +1153,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
|
|||
ttm->pages[i]->mapping = NULL;
|
||||
|
||||
adev = amdgpu_ttm_adev(bdev);
|
||||
return ttm_pool_free(&adev->mman.bdev.pool, ttm);
|
||||
|
||||
if (adev->mman.ttm_pools && gtt->pool_id >= 0)
|
||||
pool = &adev->mman.ttm_pools[gtt->pool_id];
|
||||
else
|
||||
pool = &adev->mman.bdev.pool;
|
||||
|
||||
return ttm_pool_free(pool, ttm);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1623,14 +1665,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
|
||||
static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
|
||||
uint32_t reserve_size)
|
||||
{
|
||||
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
|
||||
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
|
||||
ctx->c2p_train_data_offset =
|
||||
ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
|
||||
ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
|
||||
ctx->p2c_train_data_offset =
|
||||
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
|
||||
ctx->train_data_size =
|
||||
|
@ -1648,11 +1691,12 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
|
|||
*/
|
||||
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret;
|
||||
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
|
||||
bool mem_train_support = false;
|
||||
uint32_t reserve_size = 0;
|
||||
int ret;
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
if (adev->bios && !amdgpu_sriov_vf(adev)) {
|
||||
if (amdgpu_atomfirmware_mem_training_supported(adev))
|
||||
mem_train_support = true;
|
||||
else
|
||||
|
@ -1666,14 +1710,18 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
|
|||
* Otherwise, fallback to legacy approach to check and reserve tmr block for ip
|
||||
* discovery data and G6 memory training data respectively
|
||||
*/
|
||||
adev->mman.discovery_tmr_size =
|
||||
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
|
||||
if (!adev->mman.discovery_tmr_size)
|
||||
adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
|
||||
if (adev->bios)
|
||||
reserve_size =
|
||||
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
|
||||
|
||||
if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
|
||||
reserve_size = max(reserve_size, (uint32_t)280 << 20);
|
||||
else if (!reserve_size)
|
||||
reserve_size = DISCOVERY_TMR_OFFSET;
|
||||
|
||||
if (mem_train_support) {
|
||||
/* reserve vram for mem train according to TMR location */
|
||||
amdgpu_ttm_training_data_block_init(adev);
|
||||
amdgpu_ttm_training_data_block_init(adev, reserve_size);
|
||||
ret = amdgpu_bo_create_kernel_at(adev,
|
||||
ctx->c2p_train_data_offset,
|
||||
ctx->train_data_size,
|
||||
|
@ -1687,20 +1735,58 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
|
|||
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
|
||||
}
|
||||
|
||||
ret = amdgpu_bo_create_kernel_at(adev,
|
||||
adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
|
||||
adev->mman.discovery_tmr_size,
|
||||
&adev->mman.discovery_memory,
|
||||
NULL);
|
||||
if (ret) {
|
||||
DRM_ERROR("alloc tmr failed(%d)!\n", ret);
|
||||
amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
|
||||
return ret;
|
||||
if (!adev->gmc.is_app_apu) {
|
||||
ret = amdgpu_bo_create_kernel_at(
|
||||
adev, adev->gmc.real_vram_size - reserve_size,
|
||||
reserve_size, &adev->mman.fw_reserved_memory, NULL);
|
||||
if (ret) {
|
||||
DRM_ERROR("alloc tmr failed(%d)!\n", ret);
|
||||
amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
|
||||
NULL, NULL);
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
|
||||
return 0;
|
||||
|
||||
adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
|
||||
sizeof(*adev->mman.ttm_pools),
|
||||
GFP_KERNEL);
|
||||
if (!adev->mman.ttm_pools)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
|
||||
ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
|
||||
adev->gmc.mem_partitions[i].numa.node,
|
||||
false, false);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
|
||||
return;
|
||||
|
||||
for (i = 0; i < adev->gmc.num_mem_partitions; i++)
|
||||
ttm_pool_fini(&adev->mman.ttm_pools[i]);
|
||||
|
||||
kfree(adev->mman.ttm_pools);
|
||||
adev->mman.ttm_pools = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
|
||||
* gtt/vram related fields.
|
||||
|
@ -1727,6 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_ttm_pools_init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to init ttm pools(%d).\n", r);
|
||||
return r;
|
||||
}
|
||||
adev->mman.initialized = true;
|
||||
|
||||
/* Initialize VRAM pool with all of VRAM divided into pages */
|
||||
|
@ -1744,6 +1836,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
|
||||
adev->gmc.visible_vram_size);
|
||||
|
||||
else if (adev->gmc.is_app_apu)
|
||||
DRM_DEBUG_DRIVER(
|
||||
"No need to ioremap when real vram size is 0\n");
|
||||
else
|
||||
#endif
|
||||
adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
|
||||
|
@ -1755,9 +1850,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
*place on the VRAM, so reserve it early.
|
||||
*/
|
||||
r = amdgpu_ttm_fw_reserve_vram_init(adev);
|
||||
if (r) {
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
*The reserved vram for driver must be pinned to the specified
|
||||
|
@ -1781,48 +1875,45 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
/* allocate memory as required for VGA
|
||||
* This is used for VGA emulation and pre-OS scanout buffers to
|
||||
* avoid display artifacts while transitioning between pre-OS
|
||||
* and driver. */
|
||||
r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
|
||||
&adev->mman.stolen_vga_memory,
|
||||
NULL);
|
||||
if (r)
|
||||
return r;
|
||||
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
|
||||
adev->mman.stolen_extended_size,
|
||||
&adev->mman.stolen_extended_memory,
|
||||
NULL);
|
||||
if (r)
|
||||
return r;
|
||||
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
|
||||
adev->mman.stolen_reserved_size,
|
||||
&adev->mman.stolen_reserved_memory,
|
||||
NULL);
|
||||
if (r)
|
||||
return r;
|
||||
* and driver.
|
||||
*/
|
||||
if (!adev->gmc.is_app_apu) {
|
||||
r = amdgpu_bo_create_kernel_at(adev, 0,
|
||||
adev->mman.stolen_vga_size,
|
||||
&adev->mman.stolen_vga_memory,
|
||||
NULL);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
|
||||
adev->mman.stolen_extended_size,
|
||||
&adev->mman.stolen_extended_memory,
|
||||
NULL);
|
||||
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_bo_create_kernel_at(adev,
|
||||
adev->mman.stolen_reserved_offset,
|
||||
adev->mman.stolen_reserved_size,
|
||||
&adev->mman.stolen_reserved_memory,
|
||||
NULL);
|
||||
if (r)
|
||||
return r;
|
||||
} else {
|
||||
DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
|
||||
}
|
||||
|
||||
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
|
||||
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
|
||||
(unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
|
||||
|
||||
/* Compute GTT size, either based on 1/2 the size of RAM size
|
||||
* or whatever the user passed on module init */
|
||||
if (amdgpu_gtt_size == -1) {
|
||||
struct sysinfo si;
|
||||
|
||||
si_meminfo(&si);
|
||||
/* Certain GL unit tests for large textures can cause problems
|
||||
* with the OOM killer since there is no way to link this memory
|
||||
* to a process. This was originally mitigated (but not necessarily
|
||||
* eliminated) by limiting the GTT size. The problem is this limit
|
||||
* is often too low for many modern games so just make the limit 1/2
|
||||
* of system memory which aligns with TTM. The OOM accounting needs
|
||||
* to be addressed, but we shouldn't prevent common 3D applications
|
||||
* from being usable just to potentially mitigate that corner case.
|
||||
*/
|
||||
gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
|
||||
(u64)si.totalram * si.mem_unit / 2);
|
||||
} else {
|
||||
/* Compute GTT size, either based on TTM limit
|
||||
* or whatever the user passed on module init.
|
||||
*/
|
||||
if (amdgpu_gtt_size == -1)
|
||||
gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
|
||||
else
|
||||
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
|
||||
}
|
||||
|
||||
/* Initialize GTT memory pool */
|
||||
r = amdgpu_gtt_mgr_init(adev, gtt_size);
|
||||
|
@ -1831,7 +1922,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
return r;
|
||||
}
|
||||
DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
|
||||
(unsigned)(gtt_size / (1024 * 1024)));
|
||||
(unsigned int)(gtt_size / (1024 * 1024)));
|
||||
|
||||
/* Initialize preemptible memory pool */
|
||||
r = amdgpu_preempt_mgr_init(adev);
|
||||
|
@ -1858,7 +1949,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
DRM_ERROR("Failed initializing oa heap.\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
&adev->mman.sdma_access_bo, NULL,
|
||||
|
@ -1874,18 +1964,24 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
void amdgpu_ttm_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
int idx;
|
||||
|
||||
if (!adev->mman.initialized)
|
||||
return;
|
||||
|
||||
amdgpu_ttm_pools_fini(adev);
|
||||
|
||||
amdgpu_ttm_training_reserve_vram_fini(adev);
|
||||
/* return the stolen vga memory back to VRAM */
|
||||
amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
|
||||
/* return the IP Discovery TMR memory back to VRAM */
|
||||
amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
|
||||
if (adev->mman.stolen_reserved_size)
|
||||
amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
|
||||
NULL, NULL);
|
||||
if (!adev->gmc.is_app_apu) {
|
||||
amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
|
||||
/* return the FW reserved memory back to VRAM */
|
||||
amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
|
||||
NULL);
|
||||
if (adev->mman.stolen_reserved_size)
|
||||
amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
|
||||
NULL, NULL);
|
||||
}
|
||||
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
|
||||
&adev->mman.sdma_access_ptr);
|
||||
amdgpu_ttm_fw_reserve_vram_fini(adev);
|
||||
|
@ -1927,7 +2023,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
|
|||
int r;
|
||||
|
||||
if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
|
||||
adev->mman.buffer_funcs_enabled == enable)
|
||||
adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
|
||||
return;
|
||||
|
||||
if (enable) {
|
||||
|
@ -1944,8 +2040,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
|
|||
r);
|
||||
return;
|
||||
}
|
||||
|
||||
r = drm_sched_entity_init(&adev->mman.delayed,
|
||||
DRM_SCHED_PRIORITY_NORMAL, &sched,
|
||||
1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
|
||||
r);
|
||||
goto error_free_entity;
|
||||
}
|
||||
} else {
|
||||
drm_sched_entity_destroy(&adev->mman.entity);
|
||||
drm_sched_entity_destroy(&adev->mman.delayed);
|
||||
dma_fence_put(man->move);
|
||||
man->move = NULL;
|
||||
}
|
||||
|
@ -1957,6 +2063,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
|
|||
size = adev->gmc.visible_vram_size;
|
||||
man->size = size;
|
||||
adev->mman.buffer_funcs_enabled = enable;
|
||||
|
||||
return;
|
||||
|
||||
error_free_entity:
|
||||
drm_sched_entity_destroy(&adev->mman.entity);
|
||||
}
|
||||
|
||||
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
|
||||
|
@ -1964,14 +2075,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
|
|||
unsigned int num_dw,
|
||||
struct dma_resv *resv,
|
||||
bool vm_needs_flush,
|
||||
struct amdgpu_job **job)
|
||||
struct amdgpu_job **job,
|
||||
bool delayed)
|
||||
{
|
||||
enum amdgpu_ib_pool_type pool = direct_submit ?
|
||||
AMDGPU_IB_POOL_DIRECT :
|
||||
AMDGPU_IB_POOL_DELAYED;
|
||||
int r;
|
||||
|
||||
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
|
||||
struct drm_sched_entity *entity = delayed ? &adev->mman.delayed :
|
||||
&adev->mman.entity;
|
||||
r = amdgpu_job_alloc_with_ib(adev, entity,
|
||||
AMDGPU_FENCE_OWNER_UNDEFINED,
|
||||
num_dw * 4, pool, job);
|
||||
if (r)
|
||||
|
@ -1997,10 +2110,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
|
|||
bool vm_needs_flush, bool tmz)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned num_loops, num_dw;
|
||||
unsigned int num_loops, num_dw;
|
||||
struct amdgpu_job *job;
|
||||
uint32_t max_bytes;
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
int r;
|
||||
|
||||
if (!direct_submit && !ring->sched.ready) {
|
||||
|
@ -2012,7 +2125,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
|
|||
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
|
||||
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
|
||||
r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
|
||||
resv, vm_needs_flush, &job);
|
||||
resv, vm_needs_flush, &job, false);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -2048,7 +2161,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
|
|||
uint64_t dst_addr, uint32_t byte_count,
|
||||
struct dma_resv *resv,
|
||||
struct dma_fence **fence,
|
||||
bool vm_needs_flush)
|
||||
bool vm_needs_flush, bool delayed)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned int num_loops, num_dw;
|
||||
|
@ -2061,7 +2174,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
|
|||
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
|
||||
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
|
||||
r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
|
||||
&job);
|
||||
&job, delayed);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -2084,7 +2197,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
|
|||
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
|
||||
uint32_t src_data,
|
||||
struct dma_resv *resv,
|
||||
struct dma_fence **f)
|
||||
struct dma_fence **f,
|
||||
bool delayed)
|
||||
{
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
|
||||
|
@ -2113,7 +2227,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
|
|||
goto error;
|
||||
|
||||
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
|
||||
&next, true);
|
||||
&next, true, delayed);
|
||||
if (r)
|
||||
goto error;
|
||||
|
||||
|
@ -2164,7 +2278,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
|
|||
|
||||
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct amdgpu_device *adev = m->private;
|
||||
|
||||
return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ struct amdgpu_gtt_mgr {
|
|||
|
||||
struct amdgpu_mman {
|
||||
struct ttm_device bdev;
|
||||
struct ttm_pool *ttm_pools;
|
||||
bool initialized;
|
||||
void __iomem *aper_base_kaddr;
|
||||
|
||||
|
@ -60,6 +61,8 @@ struct amdgpu_mman {
|
|||
struct mutex gtt_window_lock;
|
||||
/* Scheduler entity for buffer moves */
|
||||
struct drm_sched_entity entity;
|
||||
/* Scheduler entity for VRAM clearing */
|
||||
struct drm_sched_entity delayed;
|
||||
|
||||
struct amdgpu_vram_mgr vram_mgr;
|
||||
struct amdgpu_gtt_mgr gtt_mgr;
|
||||
|
@ -78,7 +81,8 @@ struct amdgpu_mman {
|
|||
/* discovery */
|
||||
uint8_t *discovery_bin;
|
||||
uint32_t discovery_tmr_size;
|
||||
struct amdgpu_bo *discovery_memory;
|
||||
/* fw reserved memory */
|
||||
struct amdgpu_bo *fw_reserved_memory;
|
||||
|
||||
/* firmware VRAM reservation */
|
||||
u64 fw_vram_usage_start_offset;
|
||||
|
@ -150,7 +154,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
|
|||
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
|
||||
uint32_t src_data,
|
||||
struct dma_resv *resv,
|
||||
struct dma_fence **fence);
|
||||
struct dma_fence **fence,
|
||||
bool delayed);
|
||||
|
||||
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
|
||||
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
|
||||
|
|
|
@ -748,7 +748,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
|
|||
const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
|
||||
u8 *ucode_addr;
|
||||
|
||||
if (NULL == ucode->fw)
|
||||
if (!ucode->fw)
|
||||
return 0;
|
||||
|
||||
ucode->mc_addr = mc_addr;
|
||||
|
@ -972,7 +972,7 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
|
|||
uint8_t *src_addr = NULL;
|
||||
uint8_t *dst_addr = NULL;
|
||||
|
||||
if (NULL == ucode->fw)
|
||||
if (!ucode->fw)
|
||||
return 0;
|
||||
|
||||
comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
|
||||
|
@ -1043,6 +1043,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
|
|||
if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
|
||||
adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
|
||||
const struct gfx_firmware_header_v1_0 *cp_hdr;
|
||||
|
||||
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
|
||||
amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset,
|
||||
adev->firmware.fw_buf_ptr + fw_offset);
|
||||
|
|
|
@ -59,6 +59,8 @@ struct amdgpu_umc_ras {
|
|||
void *ras_error_status);
|
||||
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
|
||||
void *ras_error_status);
|
||||
/* support different eeprom table version for different asic */
|
||||
void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
|
||||
};
|
||||
|
||||
struct amdgpu_umc_funcs {
|
||||
|
|
|
@ -35,17 +35,51 @@ struct amdgpu_debugfs_regs2_iocdata {
|
|||
} srbm;
|
||||
};
|
||||
|
||||
struct amdgpu_debugfs_regs2_iocdata_v2 {
|
||||
__u32 use_srbm, use_grbm, pg_lock;
|
||||
struct {
|
||||
__u32 se, sh, instance;
|
||||
} grbm;
|
||||
struct {
|
||||
__u32 me, pipe, queue, vmid;
|
||||
} srbm;
|
||||
u32 xcc_id;
|
||||
};
|
||||
|
||||
struct amdgpu_debugfs_gprwave_iocdata {
|
||||
u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id;
|
||||
struct {
|
||||
u32 thread, vpgr_or_sgpr;
|
||||
} gpr;
|
||||
};
|
||||
|
||||
/*
|
||||
* MMIO debugfs state data (per file* handle)
|
||||
*/
|
||||
struct amdgpu_debugfs_regs2_data {
|
||||
struct amdgpu_device *adev;
|
||||
struct mutex lock;
|
||||
struct amdgpu_debugfs_regs2_iocdata id;
|
||||
struct amdgpu_debugfs_regs2_iocdata_v2 id;
|
||||
};
|
||||
|
||||
struct amdgpu_debugfs_gprwave_data {
|
||||
struct amdgpu_device *adev;
|
||||
struct mutex lock;
|
||||
struct amdgpu_debugfs_gprwave_iocdata id;
|
||||
};
|
||||
|
||||
enum AMDGPU_DEBUGFS_REGS2_CMDS {
|
||||
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0,
|
||||
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
|
||||
};
|
||||
|
||||
enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
|
||||
AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0,
|
||||
};
|
||||
|
||||
//reg2 interface
|
||||
#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata)
|
||||
#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2)
|
||||
|
||||
//gprwave interface
|
||||
#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata)
|
||||
|
|
|
@ -96,16 +96,16 @@
|
|||
*/
|
||||
struct amdgpu_uvd_cs_ctx {
|
||||
struct amdgpu_cs_parser *parser;
|
||||
unsigned reg, count;
|
||||
unsigned data0, data1;
|
||||
unsigned idx;
|
||||
unsigned int reg, count;
|
||||
unsigned int data0, data1;
|
||||
unsigned int idx;
|
||||
struct amdgpu_ib *ib;
|
||||
|
||||
/* does the IB has a msg command */
|
||||
bool has_msg_cmd;
|
||||
|
||||
/* minimum buffer sizes */
|
||||
unsigned *buf_sizes;
|
||||
unsigned int *buf_sizes;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DRM_AMDGPU_SI
|
||||
|
@ -186,7 +186,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
|
|||
unsigned long bo_size;
|
||||
const char *fw_name;
|
||||
const struct common_firmware_header *hdr;
|
||||
unsigned family_id;
|
||||
unsigned int family_id;
|
||||
int i, j, r;
|
||||
|
||||
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
|
||||
|
@ -275,7 +275,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
|
|||
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
|
||||
|
||||
if (adev->asic_type < CHIP_VEGA20) {
|
||||
unsigned version_major, version_minor;
|
||||
unsigned int version_major, version_minor;
|
||||
|
||||
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
|
||||
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
|
||||
|
@ -420,7 +420,7 @@ int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
|
|||
|
||||
int amdgpu_uvd_suspend(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned size;
|
||||
unsigned int size;
|
||||
void *ptr;
|
||||
int i, j, idx;
|
||||
bool in_ras_intr = amdgpu_ras_intr_triggered();
|
||||
|
@ -469,7 +469,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
|
|||
|
||||
int amdgpu_uvd_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned size;
|
||||
unsigned int size;
|
||||
void *ptr;
|
||||
int i, idx;
|
||||
|
||||
|
@ -491,7 +491,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
|
|||
adev->uvd.inst[i].saved_bo = NULL;
|
||||
} else {
|
||||
const struct common_firmware_header *hdr;
|
||||
unsigned offset;
|
||||
unsigned int offset;
|
||||
|
||||
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
|
||||
|
@ -542,6 +542,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
|
|||
static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < abo->placement.num_placement; ++i) {
|
||||
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
|
||||
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
|
||||
|
@ -579,7 +580,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
|
|||
|
||||
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
|
||||
if (r) {
|
||||
DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
|
||||
DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -589,6 +590,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
|
|||
if (cmd == 0x0 || cmd == 0x3) {
|
||||
/* yes, force it into VRAM */
|
||||
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, domain);
|
||||
}
|
||||
amdgpu_uvd_force_into_uvd_segment(bo);
|
||||
|
@ -609,21 +611,21 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
|
|||
* Peek into the decode message and calculate the necessary buffer sizes.
|
||||
*/
|
||||
static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
|
||||
unsigned buf_sizes[])
|
||||
unsigned int buf_sizes[])
|
||||
{
|
||||
unsigned stream_type = msg[4];
|
||||
unsigned width = msg[6];
|
||||
unsigned height = msg[7];
|
||||
unsigned dpb_size = msg[9];
|
||||
unsigned pitch = msg[28];
|
||||
unsigned level = msg[57];
|
||||
unsigned int stream_type = msg[4];
|
||||
unsigned int width = msg[6];
|
||||
unsigned int height = msg[7];
|
||||
unsigned int dpb_size = msg[9];
|
||||
unsigned int pitch = msg[28];
|
||||
unsigned int level = msg[57];
|
||||
|
||||
unsigned width_in_mb = width / 16;
|
||||
unsigned height_in_mb = ALIGN(height / 16, 2);
|
||||
unsigned fs_in_mb = width_in_mb * height_in_mb;
|
||||
unsigned int width_in_mb = width / 16;
|
||||
unsigned int height_in_mb = ALIGN(height / 16, 2);
|
||||
unsigned int fs_in_mb = width_in_mb * height_in_mb;
|
||||
|
||||
unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
|
||||
unsigned min_ctx_size = ~0;
|
||||
unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer;
|
||||
unsigned int min_ctx_size = ~0;
|
||||
|
||||
image_size = width * height;
|
||||
image_size += image_size / 2;
|
||||
|
@ -631,7 +633,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
|
|||
|
||||
switch (stream_type) {
|
||||
case 0: /* H264 */
|
||||
switch(level) {
|
||||
switch (level) {
|
||||
case 30:
|
||||
num_dpb_buffer = 8100 / fs_in_mb;
|
||||
break;
|
||||
|
@ -709,7 +711,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
|
|||
break;
|
||||
|
||||
case 7: /* H264 Perf */
|
||||
switch(level) {
|
||||
switch (level) {
|
||||
case 30:
|
||||
num_dpb_buffer = 8100 / fs_in_mb;
|
||||
break;
|
||||
|
@ -742,7 +744,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
|
|||
/* reference picture buffer */
|
||||
min_dpb_size = image_size * num_dpb_buffer;
|
||||
|
||||
if (!adev->uvd.use_ctx_buf){
|
||||
if (!adev->uvd.use_ctx_buf) {
|
||||
/* macroblock context buffer */
|
||||
min_dpb_size +=
|
||||
width_in_mb * height_in_mb * num_dpb_buffer * 192;
|
||||
|
@ -805,7 +807,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
|
|||
* Make sure that we don't open up to many sessions.
|
||||
*/
|
||||
static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
|
||||
struct amdgpu_bo *bo, unsigned offset)
|
||||
struct amdgpu_bo *bo, unsigned int offset)
|
||||
{
|
||||
struct amdgpu_device *adev = ctx->parser->adev;
|
||||
int32_t *msg, msg_type, handle;
|
||||
|
@ -911,7 +913,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
|
|||
|
||||
r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
|
||||
if (r) {
|
||||
DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
|
||||
DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -930,7 +932,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
|
|||
if (cmd < 0x4) {
|
||||
if ((end - start) < ctx->buf_sizes[cmd]) {
|
||||
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
|
||||
(unsigned)(end - start),
|
||||
(unsigned int)(end - start),
|
||||
ctx->buf_sizes[cmd]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -938,7 +940,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
|
|||
} else if (cmd == 0x206) {
|
||||
if ((end - start) < ctx->buf_sizes[4]) {
|
||||
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
|
||||
(unsigned)(end - start),
|
||||
(unsigned int)(end - start),
|
||||
ctx->buf_sizes[4]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -949,14 +951,14 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
|
|||
|
||||
if (!ctx->parser->adev->uvd.address_64_bit) {
|
||||
if ((start >> 28) != ((end - 1) >> 28)) {
|
||||
DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
|
||||
DRM_ERROR("reloc %llx-%llx crossing 256MB boundary!\n",
|
||||
start, end);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((cmd == 0 || cmd == 0x3) &&
|
||||
(start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
|
||||
DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
|
||||
DRM_ERROR("msg/fb buffer %llx-%llx out of 256MB segment!\n",
|
||||
start, end);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -990,7 +992,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
|
|||
|
||||
ctx->idx++;
|
||||
for (i = 0; i <= ctx->count; ++i) {
|
||||
unsigned reg = ctx->reg + i;
|
||||
unsigned int reg = ctx->reg + i;
|
||||
|
||||
if (ctx->idx >= ctx->ib->length_dw) {
|
||||
DRM_ERROR("Register command after end of CS!\n");
|
||||
|
@ -1036,7 +1038,8 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
|
|||
|
||||
for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
|
||||
uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
|
||||
unsigned type = CP_PACKET_GET_TYPE(cmd);
|
||||
unsigned int type = CP_PACKET_GET_TYPE(cmd);
|
||||
|
||||
switch (type) {
|
||||
case PACKET_TYPE0:
|
||||
ctx->reg = CP_PACKET0_GET_REG(cmd);
|
||||
|
@ -1070,7 +1073,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
|
|||
struct amdgpu_ib *ib)
|
||||
{
|
||||
struct amdgpu_uvd_cs_ctx ctx = {};
|
||||
unsigned buf_sizes[] = {
|
||||
unsigned int buf_sizes[] = {
|
||||
[0x00000000] = 2048,
|
||||
[0x00000001] = 0xFFFFFFFF,
|
||||
[0x00000002] = 0xFFFFFFFF,
|
||||
|
@ -1185,8 +1188,9 @@ err_free:
|
|||
}
|
||||
|
||||
/* multiple fence commands without any stream commands in between can
|
||||
crash the vcpu so just try to emmit a dummy create/destroy msg to
|
||||
avoid this */
|
||||
* crash the vcpu so just try to emmit a dummy create/destroy msg to
|
||||
* avoid this
|
||||
*/
|
||||
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
|
||||
struct dma_fence **fence)
|
||||
{
|
||||
|
@ -1252,15 +1256,14 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
|
|||
{
|
||||
struct amdgpu_device *adev =
|
||||
container_of(work, struct amdgpu_device, uvd.idle_work.work);
|
||||
unsigned fences = 0, i, j;
|
||||
unsigned int fences = 0, i, j;
|
||||
|
||||
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
continue;
|
||||
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
|
||||
for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
|
||||
for (j = 0; j < adev->uvd.num_enc_rings; ++j)
|
||||
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (fences == 0) {
|
||||
|
@ -1356,7 +1359,7 @@ error:
|
|||
*/
|
||||
uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
uint32_t used_handles = 0;
|
||||
|
||||
for (i = 0; i < adev->uvd.max_handles; ++i) {
|
||||
|
|
|
@ -99,7 +99,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
|
|||
{
|
||||
const char *fw_name;
|
||||
const struct common_firmware_header *hdr;
|
||||
unsigned ucode_version, version_major, version_minor, binary_id;
|
||||
unsigned int ucode_version, version_major, version_minor, binary_id;
|
||||
int i, r;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
|
@ -207,7 +207,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
|
|||
*/
|
||||
int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
|
||||
if (adev->vce.vcpu_bo == NULL)
|
||||
return 0;
|
||||
|
@ -286,7 +286,7 @@ int amdgpu_vce_resume(struct amdgpu_device *adev)
|
|||
{
|
||||
void *cpu_addr;
|
||||
const struct common_firmware_header *hdr;
|
||||
unsigned offset;
|
||||
unsigned int offset;
|
||||
int r, idx;
|
||||
|
||||
if (adev->vce.vcpu_bo == NULL)
|
||||
|
@ -332,7 +332,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
|
|||
{
|
||||
struct amdgpu_device *adev =
|
||||
container_of(work, struct amdgpu_device, vce.idle_work.work);
|
||||
unsigned i, count = 0;
|
||||
unsigned int i, count = 0;
|
||||
|
||||
for (i = 0; i < adev->vce.num_rings; i++)
|
||||
count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
|
||||
|
@ -409,6 +409,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
|
|||
{
|
||||
struct amdgpu_ring *ring = &adev->vce.ring[0];
|
||||
int i, r;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
|
||||
uint32_t handle = atomic_read(&adev->vce.handles[i]);
|
||||
|
||||
|
@ -436,7 +437,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
|
|||
static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
|
||||
struct dma_fence **fence)
|
||||
{
|
||||
const unsigned ib_size_dw = 1024;
|
||||
const unsigned int ib_size_dw = 1024;
|
||||
struct amdgpu_job *job;
|
||||
struct amdgpu_ib *ib;
|
||||
struct amdgpu_ib ib_msg;
|
||||
|
@ -528,7 +529,7 @@ err:
|
|||
static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
|
||||
bool direct, struct dma_fence **fence)
|
||||
{
|
||||
const unsigned ib_size_dw = 1024;
|
||||
const unsigned int ib_size_dw = 1024;
|
||||
struct amdgpu_job *job;
|
||||
struct amdgpu_ib *ib;
|
||||
struct dma_fence *f = NULL;
|
||||
|
@ -596,12 +597,12 @@ err:
|
|||
*/
|
||||
static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_ib *ib, int lo, int hi,
|
||||
unsigned size, int32_t index)
|
||||
unsigned int size, int32_t index)
|
||||
{
|
||||
int64_t offset = ((uint64_t)size) * ((int64_t)index);
|
||||
struct ttm_operation_ctx ctx = { false, false };
|
||||
struct amdgpu_bo_va_mapping *mapping;
|
||||
unsigned i, fpfn, lpfn;
|
||||
unsigned int i, fpfn, lpfn;
|
||||
struct amdgpu_bo *bo;
|
||||
uint64_t addr;
|
||||
int r;
|
||||
|
@ -619,7 +620,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
|
|||
|
||||
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
|
||||
if (r) {
|
||||
DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
|
||||
DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
|
||||
addr, lo, hi, size, index);
|
||||
return r;
|
||||
}
|
||||
|
@ -646,7 +647,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
|
|||
* Patch relocation inside command stream with real buffer address
|
||||
*/
|
||||
static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
|
||||
int lo, int hi, unsigned size, uint32_t index)
|
||||
int lo, int hi, unsigned int size, uint32_t index)
|
||||
{
|
||||
struct amdgpu_bo_va_mapping *mapping;
|
||||
struct amdgpu_bo *bo;
|
||||
|
@ -662,14 +663,14 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
|
|||
|
||||
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
|
||||
if (r) {
|
||||
DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
|
||||
DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
|
||||
addr, lo, hi, size, index);
|
||||
return r;
|
||||
}
|
||||
|
||||
if ((addr + (uint64_t)size) >
|
||||
(mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
|
||||
DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
|
||||
DRM_ERROR("BO too small for addr 0x%010llx %d %d\n",
|
||||
addr, lo, hi);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -692,12 +693,12 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
|
|||
* @allocated: allocated a new handle?
|
||||
*
|
||||
* Validates the handle and return the found session index or -EINVAL
|
||||
* we we don't have another free session index.
|
||||
* we don't have another free session index.
|
||||
*/
|
||||
static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
|
||||
uint32_t handle, uint32_t *allocated)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
|
||||
/* validate the handle */
|
||||
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
|
||||
|
@ -735,14 +736,14 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
|
|||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib)
|
||||
{
|
||||
unsigned fb_idx = 0, bs_idx = 0;
|
||||
unsigned int fb_idx = 0, bs_idx = 0;
|
||||
int session_idx = -1;
|
||||
uint32_t destroyed = 0;
|
||||
uint32_t created = 0;
|
||||
uint32_t allocated = 0;
|
||||
uint32_t tmp, handle = 0;
|
||||
uint32_t *size = &tmp;
|
||||
unsigned idx;
|
||||
unsigned int idx;
|
||||
int i, r = 0;
|
||||
|
||||
job->vm = NULL;
|
||||
|
@ -1084,7 +1085,7 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
|
|||
*
|
||||
*/
|
||||
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags)
|
||||
unsigned int flags)
|
||||
{
|
||||
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
|
||||
|
||||
|
@ -1106,7 +1107,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t rptr;
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
int r, timeout = adev->usec_timeout;
|
||||
|
||||
/* skip ring test for sriov*/
|
||||
|
@ -1171,7 +1172,7 @@ error:
|
|||
|
||||
enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
|
||||
{
|
||||
switch(ring) {
|
||||
switch (ring) {
|
||||
case 0:
|
||||
return AMDGPU_RING_PRIO_0;
|
||||
case 1:
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
|
||||
#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin"
|
||||
#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin"
|
||||
#define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin"
|
||||
#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
|
||||
|
||||
MODULE_FIRMWARE(FIRMWARE_RAVEN);
|
||||
|
@ -77,6 +78,7 @@ MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
|
|||
MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
|
||||
MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
|
||||
MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
|
||||
MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
|
||||
MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
|
||||
|
||||
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
|
||||
|
@ -167,7 +169,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
|
|||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
|
||||
|
||||
if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)){
|
||||
if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) {
|
||||
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
|
||||
log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
|
||||
} else {
|
||||
|
@ -233,11 +235,11 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
|
|||
if (adev->vcn.harvest_config & (1 << j))
|
||||
continue;
|
||||
|
||||
if (adev->vcn.indirect_sram) {
|
||||
amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
|
||||
&adev->vcn.inst[j].dpg_sram_gpu_addr,
|
||||
(void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
|
||||
}
|
||||
amdgpu_bo_free_kernel(
|
||||
&adev->vcn.inst[j].dpg_sram_bo,
|
||||
&adev->vcn.inst[j].dpg_sram_gpu_addr,
|
||||
(void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
|
||||
|
||||
kvfree(adev->vcn.inst[j].saved_bo);
|
||||
|
||||
amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
|
||||
|
@ -274,20 +276,19 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t
|
|||
bool ret = false;
|
||||
int vcn_config = adev->vcn.vcn_config[vcn_instance];
|
||||
|
||||
if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) {
|
||||
if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
|
||||
ret = true;
|
||||
} else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) {
|
||||
else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK))
|
||||
ret = true;
|
||||
} else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) {
|
||||
else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK))
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_vcn_suspend(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned size;
|
||||
unsigned int size;
|
||||
void *ptr;
|
||||
int i, idx;
|
||||
|
||||
|
@ -316,7 +317,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
|
|||
|
||||
int amdgpu_vcn_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned size;
|
||||
unsigned int size;
|
||||
void *ptr;
|
||||
int i, idx;
|
||||
|
||||
|
@ -338,7 +339,7 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
|
|||
adev->vcn.inst[i].saved_bo = NULL;
|
||||
} else {
|
||||
const struct common_firmware_header *hdr;
|
||||
unsigned offset;
|
||||
unsigned int offset;
|
||||
|
||||
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
|
||||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
|
||||
|
@ -369,9 +370,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
|
|||
if (adev->vcn.harvest_config & (1 << j))
|
||||
continue;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
|
||||
fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
|
||||
}
|
||||
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
|
||||
struct dpg_pause_state new_state;
|
||||
|
@ -458,7 +458,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t tmp = 0;
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
int r;
|
||||
|
||||
/* VCN in SRIOV does not support direct register read/write */
|
||||
|
@ -795,7 +795,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
|
|||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t rptr;
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
int r;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
|
@ -993,11 +993,14 @@ error:
|
|||
|
||||
int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
long r;
|
||||
|
||||
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
|
||||
if (r)
|
||||
goto error;
|
||||
if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(4, 0, 3)) {
|
||||
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
|
||||
|
||||
|
@ -1007,7 +1010,7 @@ error:
|
|||
|
||||
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
|
||||
{
|
||||
switch(ring) {
|
||||
switch (ring) {
|
||||
case 0:
|
||||
return AMDGPU_RING_PRIO_0;
|
||||
case 1:
|
||||
|
@ -1026,6 +1029,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
|
|||
|
||||
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
|
||||
const struct common_firmware_header *hdr;
|
||||
|
||||
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
|
@ -1041,6 +1045,9 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
|
|||
adev->firmware.ucode[idx].fw = adev->vcn.fw;
|
||||
adev->firmware.fw_size +=
|
||||
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
|
||||
|
||||
if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(4, 0, 3))
|
||||
break;
|
||||
}
|
||||
dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
|
||||
}
|
||||
|
@ -1051,7 +1058,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
|
|||
*/
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
|
||||
size_t size, loff_t *pos)
|
||||
size_t size, loff_t *pos)
|
||||
{
|
||||
struct amdgpu_vcn_inst *vcn;
|
||||
void *log_buf;
|
||||
|
@ -1097,7 +1104,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
|
|||
if (read_pos == AMDGPU_VCNFW_LOG_SIZE)
|
||||
read_pos = plog->header_size;
|
||||
if (read_num[i] == copy_to_user((buf + read_bytes),
|
||||
(log_buf + read_pos), read_num[i]))
|
||||
(log_buf + read_pos), read_num[i]))
|
||||
return -EFAULT;
|
||||
|
||||
read_bytes += read_num[i];
|
||||
|
@ -1118,7 +1125,7 @@ static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = {
|
|||
#endif
|
||||
|
||||
void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
|
||||
struct amdgpu_vcn_inst *vcn)
|
||||
struct amdgpu_vcn_inst *vcn)
|
||||
{
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
struct drm_minor *minor = adev_to_drm(adev)->primary;
|
||||
|
@ -1126,7 +1133,7 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
|
|||
char name[32];
|
||||
|
||||
sprintf(name, "amdgpu_vcn_%d_fwlog", i);
|
||||
debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn,
|
||||
debugfs_create_file_size(name, S_IFREG | 0444, root, vcn,
|
||||
&amdgpu_debugfs_vcnfwlog_fops,
|
||||
AMDGPU_VCNFW_LOG_SIZE);
|
||||
#endif
|
||||
|
@ -1140,7 +1147,7 @@ void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
|
|||
uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
|
||||
volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
|
||||
volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
|
||||
+ vcn->fw_shared.log_offset;
|
||||
+ vcn->fw_shared.log_offset;
|
||||
*flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
|
||||
fw_log->is_enabled = 1;
|
||||
fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF);
|
||||
|
@ -1181,6 +1188,31 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
|
||||
{
|
||||
int r, i;
|
||||
|
||||
r = amdgpu_ras_block_late_init(adev, ras_block);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (amdgpu_ras_is_supported(adev, ras_block->block)) {
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
|
||||
if (r)
|
||||
goto late_fini;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
late_fini:
|
||||
amdgpu_ras_block_late_fini(adev, ras_block);
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int err;
|
||||
|
@ -1202,7 +1234,7 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
|
|||
adev->vcn.ras_if = &ras->ras_block.ras_comm;
|
||||
|
||||
if (!ras->ras_block.ras_late_init)
|
||||
ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
|
||||
ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
|
||||
#define AMDGPU_VCN_MAX_ENC_RINGS 3
|
||||
|
||||
#define AMDGPU_MAX_VCN_INSTANCES 2
|
||||
#define AMDGPU_MAX_VCN_INSTANCES 4
|
||||
#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
|
||||
|
||||
#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
|
||||
|
@ -141,18 +141,23 @@
|
|||
RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
|
||||
})
|
||||
|
||||
#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
|
||||
do { \
|
||||
if (!indirect) { \
|
||||
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
|
||||
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
|
||||
(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
|
||||
mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
|
||||
offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
|
||||
} else { \
|
||||
*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
|
||||
*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \
|
||||
} \
|
||||
#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
|
||||
do { \
|
||||
if (!indirect) { \
|
||||
WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
|
||||
mmUVD_DPG_LMA_DATA, value); \
|
||||
WREG32_SOC15( \
|
||||
VCN, GET_INST(VCN, inst_idx), \
|
||||
mmUVD_DPG_LMA_CTL, \
|
||||
(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
|
||||
mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
|
||||
offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
|
||||
} else { \
|
||||
*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
|
||||
offset; \
|
||||
*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
|
||||
value; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
|
||||
|
@ -234,6 +239,7 @@ struct amdgpu_vcn_inst {
|
|||
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
|
||||
atomic_t sched_score;
|
||||
struct amdgpu_irq_src irq;
|
||||
struct amdgpu_irq_src ras_poison_irq;
|
||||
struct amdgpu_vcn_reg external;
|
||||
struct amdgpu_bo *dpg_sram_bo;
|
||||
struct dpg_pause_state pause_state;
|
||||
|
@ -242,6 +248,7 @@ struct amdgpu_vcn_inst {
|
|||
uint32_t *dpg_sram_curr_addr;
|
||||
atomic_t dpg_enc_submission_cnt;
|
||||
struct amdgpu_vcn_fw_shared fw_shared;
|
||||
uint8_t aid_id;
|
||||
};
|
||||
|
||||
struct amdgpu_vcn_ras {
|
||||
|
@ -271,6 +278,9 @@ struct amdgpu_vcn {
|
|||
|
||||
struct ras_common_if *ras_if;
|
||||
struct amdgpu_vcn_ras *ras;
|
||||
|
||||
uint16_t inst_mask;
|
||||
uint8_t num_inst_per_aid;
|
||||
};
|
||||
|
||||
struct amdgpu_fw_shared_rb_ptrs_struct {
|
||||
|
@ -400,6 +410,8 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
|
|||
int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev,
|
||||
struct ras_common_if *ras_block);
|
||||
int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -56,7 +56,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
|
|||
|
||||
/* enable virtual display */
|
||||
if (adev->asic_type != CHIP_ALDEBARAN &&
|
||||
adev->asic_type != CHIP_ARCTURUS) {
|
||||
adev->asic_type != CHIP_ARCTURUS &&
|
||||
((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
|
||||
if (adev->mode_info.num_crtc == 0)
|
||||
adev->mode_info.num_crtc = 1;
|
||||
adev->enable_virtual_display = true;
|
||||
|
@ -65,16 +66,19 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
|
|||
adev->cg_flags = 0;
|
||||
adev->pg_flags = 0;
|
||||
|
||||
/* enable mcbp for sriov asic_type before soc21 */
|
||||
amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0;
|
||||
/* enable mcbp for sriov */
|
||||
amdgpu_mcbp = 1;
|
||||
|
||||
/* Reduce kcq number to 2 to reduce latency */
|
||||
if (amdgpu_num_kcq == -1)
|
||||
amdgpu_num_kcq = 2;
|
||||
}
|
||||
|
||||
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
signed long r, cnt = 0;
|
||||
unsigned long flags;
|
||||
|
@ -557,7 +561,6 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
|
|||
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
|
||||
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version);
|
||||
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version);
|
||||
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_IMU, adev->gfx.imu_fw_version);
|
||||
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version);
|
||||
POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
|
||||
adev->psp.asd_context.bin_desc.fw_version);
|
||||
|
|
|
@ -1358,6 +1358,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
|
|||
amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
|
||||
|
||||
bo_va->ref_count = 1;
|
||||
bo_va->last_pt_update = dma_fence_get_stub();
|
||||
INIT_LIST_HEAD(&bo_va->valids);
|
||||
INIT_LIST_HEAD(&bo_va->invalids);
|
||||
|
||||
|
@ -1433,14 +1434,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
|
|||
uint64_t eaddr;
|
||||
|
||||
/* validate the parameters */
|
||||
if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
|
||||
size == 0 || size & ~PAGE_MASK)
|
||||
if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
if (saddr + size <= saddr || offset + size <= offset)
|
||||
return -EINVAL;
|
||||
|
||||
/* make sure object fit at this offset */
|
||||
eaddr = saddr + size - 1;
|
||||
if (saddr >= eaddr ||
|
||||
(bo && offset + size > amdgpu_bo_size(bo)) ||
|
||||
if ((bo && offset + size > amdgpu_bo_size(bo)) ||
|
||||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -1499,14 +1500,14 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
|
|||
int r;
|
||||
|
||||
/* validate the parameters */
|
||||
if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
|
||||
size == 0 || size & ~PAGE_MASK)
|
||||
if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
if (saddr + size <= saddr || offset + size <= offset)
|
||||
return -EINVAL;
|
||||
|
||||
/* make sure object fit at this offset */
|
||||
eaddr = saddr + size - 1;
|
||||
if (saddr >= eaddr ||
|
||||
(bo && offset + size > amdgpu_bo_size(bo)) ||
|
||||
if ((bo && offset + size > amdgpu_bo_size(bo)) ||
|
||||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -2067,7 +2068,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|||
vm->update_funcs = &amdgpu_vm_cpu_funcs;
|
||||
else
|
||||
vm->update_funcs = &amdgpu_vm_sdma_funcs;
|
||||
vm->last_update = NULL;
|
||||
|
||||
vm->last_update = dma_fence_get_stub();
|
||||
vm->last_unlocked = dma_fence_get_stub();
|
||||
vm->last_tlb_flush = dma_fence_get_stub();
|
||||
|
||||
|
@ -2192,7 +2194,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|||
goto unreserve_bo;
|
||||
|
||||
dma_fence_put(vm->last_update);
|
||||
vm->last_update = NULL;
|
||||
vm->last_update = dma_fence_get_stub();
|
||||
vm->is_compute_context = true;
|
||||
|
||||
/* Free the shadow bo for compute VM */
|
||||
|
@ -2282,8 +2284,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|||
}
|
||||
|
||||
dma_fence_put(vm->last_update);
|
||||
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
|
||||
amdgpu_vmid_free_reserved(adev, vm, i);
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
|
||||
if (vm->reserved_vmid[i]) {
|
||||
amdgpu_vmid_free_reserved(adev, i);
|
||||
vm->reserved_vmid[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2366,18 +2374,25 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|||
union drm_amdgpu_vm *args = data;
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
int r;
|
||||
|
||||
/* No valid flags defined yet */
|
||||
if (args->in.flags)
|
||||
return -EINVAL;
|
||||
|
||||
switch (args->in.op) {
|
||||
case AMDGPU_VM_OP_RESERVE_VMID:
|
||||
/* We only have requirement to reserve vmid from gfxhub */
|
||||
r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
|
||||
AMDGPU_GFXHUB_0);
|
||||
if (r)
|
||||
return r;
|
||||
if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
|
||||
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
|
||||
fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
|
||||
}
|
||||
|
||||
break;
|
||||
case AMDGPU_VM_OP_UNRESERVE_VMID:
|
||||
amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
|
||||
if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
|
||||
amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
|
||||
fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
|
@ -2432,6 +2447,9 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
|
|||
* amdgpu_vm_handle_fault - graceful handling of VM faults.
|
||||
* @adev: amdgpu device pointer
|
||||
* @pasid: PASID of the VM
|
||||
* @vmid: VMID, only used for GFX 9.4.3.
|
||||
* @node_id: Node_id received in IH cookie. Only applicable for
|
||||
* GFX 9.4.3.
|
||||
* @addr: Address of the fault
|
||||
* @write_fault: true is write fault, false is read fault
|
||||
*
|
||||
|
@ -2439,7 +2457,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
|
|||
* shouldn't be reported any more.
|
||||
*/
|
||||
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
|
||||
uint64_t addr, bool write_fault)
|
||||
u32 vmid, u32 node_id, uint64_t addr,
|
||||
bool write_fault)
|
||||
{
|
||||
bool is_compute_context = false;
|
||||
struct amdgpu_bo *root;
|
||||
|
@ -2463,8 +2482,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
|
|||
|
||||
addr /= AMDGPU_GPU_PAGE_SIZE;
|
||||
|
||||
if (is_compute_context &&
|
||||
!svm_range_restore_pages(adev, pasid, addr, write_fault)) {
|
||||
if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
|
||||
node_id, addr, write_fault)) {
|
||||
amdgpu_bo_unref(&root);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -111,11 +111,14 @@ struct amdgpu_mem_stats;
|
|||
/* Reserve 4MB VRAM for page tables */
|
||||
#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20)
|
||||
|
||||
/* max number of VMHUB */
|
||||
#define AMDGPU_MAX_VMHUBS 3
|
||||
#define AMDGPU_GFXHUB_0 0
|
||||
#define AMDGPU_MMHUB_0 1
|
||||
#define AMDGPU_MMHUB_1 2
|
||||
/*
|
||||
* max number of VMHUB
|
||||
* layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
|
||||
*/
|
||||
#define AMDGPU_MAX_VMHUBS 13
|
||||
#define AMDGPU_GFXHUB(x) (x)
|
||||
#define AMDGPU_MMHUB0(x) (8 + x)
|
||||
#define AMDGPU_MMHUB1(x) (8 + 4 + x)
|
||||
|
||||
/* Reserve 2MB at top/bottom of address space for kernel use */
|
||||
#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
|
||||
|
@ -326,6 +329,9 @@ struct amdgpu_vm {
|
|||
struct ttm_lru_bulk_move lru_bulk_move;
|
||||
/* Flag to indicate if VM is used for compute */
|
||||
bool is_compute_context;
|
||||
|
||||
/* Memory partition number, -1 means any partition */
|
||||
int8_t mem_id;
|
||||
};
|
||||
|
||||
struct amdgpu_vm_manager {
|
||||
|
@ -452,7 +458,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
|
|||
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
|
||||
struct amdgpu_task_info *task_info);
|
||||
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
|
||||
uint64_t addr, bool write_fault);
|
||||
u32 vmid, u32 node_id, uint64_t addr,
|
||||
bool write_fault);
|
||||
|
||||
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
|
||||
|
||||
|
|
|
@ -502,6 +502,7 @@ exit:
|
|||
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
int level, bool immediate, struct amdgpu_bo_vm **vmbo)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
|
||||
struct amdgpu_bo_param bp;
|
||||
struct amdgpu_bo *bo;
|
||||
struct dma_resv *resv;
|
||||
|
@ -512,7 +513,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
|
||||
bp.size = amdgpu_vm_pt_size(adev, level);
|
||||
bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
|
||||
if (!adev->gmc.is_app_apu)
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
|
||||
else
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
|
||||
bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
|
||||
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
|
||||
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
|
@ -529,6 +535,8 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
|
||||
bp.type = ttm_bo_type_kernel;
|
||||
bp.no_wait_gpu = immediate;
|
||||
bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
|
||||
|
||||
if (vm->root.bo)
|
||||
bp.resv = vm->root.bo->tbo.base.resv;
|
||||
|
||||
|
@ -553,6 +561,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
bp.type = ttm_bo_type_kernel;
|
||||
bp.resv = bo->tbo.base.resv;
|
||||
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
|
||||
bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
|
||||
|
||||
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
|
||||
|
||||
|
@ -564,7 +573,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
return r;
|
||||
}
|
||||
|
||||
(*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
|
||||
amdgpu_bo_add_to_shadow_list(*vmbo);
|
||||
|
||||
return 0;
|
||||
|
@ -781,13 +789,14 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
|
|||
uint64_t pe, uint64_t addr,
|
||||
unsigned int count, uint32_t incr,
|
||||
uint64_t flags)
|
||||
|
||||
{
|
||||
struct amdgpu_device *adev = params->adev;
|
||||
|
||||
if (level != AMDGPU_VM_PTB) {
|
||||
flags |= AMDGPU_PDE_PTE;
|
||||
amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
|
||||
amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
|
||||
|
||||
} else if (params->adev->asic_type >= CHIP_VEGA10 &&
|
||||
} else if (adev->asic_type >= CHIP_VEGA10 &&
|
||||
!(flags & AMDGPU_PTE_VALID) &&
|
||||
!(flags & AMDGPU_PTE_PRT)) {
|
||||
|
||||
|
@ -795,6 +804,21 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
|
|||
flags |= AMDGPU_PTE_EXECUTABLE;
|
||||
}
|
||||
|
||||
/* APUs mapping system memory may need different MTYPEs on different
|
||||
* NUMA nodes. Only do this for contiguous ranges that can be assumed
|
||||
* to be on the same NUMA node.
|
||||
*/
|
||||
if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
|
||||
adev->gmc.gmc_funcs->override_vm_pte_flags &&
|
||||
num_possible_nodes() > 1) {
|
||||
if (!params->pages_addr)
|
||||
amdgpu_gmc_override_vm_pte_flags(adev, params->vm,
|
||||
addr, &flags);
|
||||
else
|
||||
dev_dbg(adev->dev,
|
||||
"override_vm_pte_flags skipped: non-contiguous\n");
|
||||
}
|
||||
|
||||
params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
|
||||
flags);
|
||||
}
|
||||
|
|
|
@ -370,6 +370,45 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man,
|
||||
struct drm_printer *printer)
|
||||
{
|
||||
DRM_DEBUG_DRIVER("Dummy vram mgr debug\n");
|
||||
}
|
||||
|
||||
static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man,
|
||||
struct ttm_resource *res,
|
||||
const struct ttm_place *place,
|
||||
size_t size)
|
||||
{
|
||||
DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man,
|
||||
struct ttm_resource *res,
|
||||
const struct ttm_place *place,
|
||||
size_t size)
|
||||
{
|
||||
DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man,
|
||||
struct ttm_resource *res)
|
||||
{
|
||||
DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n");
|
||||
}
|
||||
|
||||
static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man,
|
||||
struct ttm_buffer_object *tbo,
|
||||
const struct ttm_place *place,
|
||||
struct ttm_resource **res)
|
||||
{
|
||||
DRM_DEBUG_DRIVER("Dummy vram mgr new\n");
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vram_mgr_new - allocate new ranges
|
||||
*
|
||||
|
@ -800,7 +839,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
|
|||
{
|
||||
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
|
||||
struct drm_buddy *mm = &mgr->mm;
|
||||
struct drm_buddy_block *block;
|
||||
struct amdgpu_vram_reservation *rsv;
|
||||
|
||||
drm_printf(printer, " vis usage:%llu\n",
|
||||
amdgpu_vram_mgr_vis_usage(mgr));
|
||||
|
@ -812,11 +851,20 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
|
|||
drm_buddy_print(mm, printer);
|
||||
|
||||
drm_printf(printer, "reserved:\n");
|
||||
list_for_each_entry(block, &mgr->reserved_pages, link)
|
||||
drm_buddy_block_print(mm, block, printer);
|
||||
list_for_each_entry(rsv, &mgr->reserved_pages, blocks)
|
||||
drm_printf(printer, "%#018llx-%#018llx: %llu\n",
|
||||
rsv->start, rsv->start + rsv->size, rsv->size);
|
||||
mutex_unlock(&mgr->lock);
|
||||
}
|
||||
|
||||
static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = {
|
||||
.alloc = amdgpu_dummy_vram_mgr_new,
|
||||
.free = amdgpu_dummy_vram_mgr_del,
|
||||
.intersects = amdgpu_dummy_vram_mgr_intersects,
|
||||
.compatible = amdgpu_dummy_vram_mgr_compatible,
|
||||
.debug = amdgpu_dummy_vram_mgr_debug
|
||||
};
|
||||
|
||||
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
|
||||
.alloc = amdgpu_vram_mgr_new,
|
||||
.free = amdgpu_vram_mgr_del,
|
||||
|
@ -841,17 +889,22 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
|
|||
ttm_resource_manager_init(man, &adev->mman.bdev,
|
||||
adev->gmc.real_vram_size);
|
||||
|
||||
man->func = &amdgpu_vram_mgr_func;
|
||||
|
||||
err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mutex_init(&mgr->lock);
|
||||
INIT_LIST_HEAD(&mgr->reservations_pending);
|
||||
INIT_LIST_HEAD(&mgr->reserved_pages);
|
||||
mgr->default_page_size = PAGE_SIZE;
|
||||
|
||||
if (!adev->gmc.is_app_apu) {
|
||||
man->func = &amdgpu_vram_mgr_func;
|
||||
|
||||
err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
} else {
|
||||
man->func = &amdgpu_dummy_vram_mgr_func;
|
||||
DRM_INFO("Setup dummy vram mgr\n");
|
||||
}
|
||||
|
||||
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
|
||||
ttm_resource_manager_set_used(man, true);
|
||||
return 0;
|
||||
|
@ -886,7 +939,8 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
|
|||
drm_buddy_free_list(&mgr->mm, &rsv->allocated);
|
||||
kfree(rsv);
|
||||
}
|
||||
drm_buddy_fini(&mgr->mm);
|
||||
if (!adev->gmc.is_app_apu)
|
||||
drm_buddy_fini(&mgr->mm);
|
||||
mutex_unlock(&mgr->lock);
|
||||
|
||||
ttm_resource_manager_cleanup(man);
|
||||
|
|
|
@ -0,0 +1,399 @@
|
|||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
#include "amdgpu_drv.h"
|
||||
|
||||
#include <drm/drm_drv.h>
|
||||
#include "../amdxcp/amdgpu_xcp_drv.h"
|
||||
|
||||
static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
|
||||
struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
|
||||
{
|
||||
int (*run_func)(void *handle, uint32_t inst_mask);
|
||||
int ret = 0;
|
||||
|
||||
if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs)
|
||||
return 0;
|
||||
|
||||
run_func = NULL;
|
||||
|
||||
switch (xcp_state) {
|
||||
case AMDGPU_XCP_PREPARE_SUSPEND:
|
||||
run_func = xcp_ip->ip_funcs->prepare_suspend;
|
||||
break;
|
||||
case AMDGPU_XCP_SUSPEND:
|
||||
run_func = xcp_ip->ip_funcs->suspend;
|
||||
break;
|
||||
case AMDGPU_XCP_PREPARE_RESUME:
|
||||
run_func = xcp_ip->ip_funcs->prepare_resume;
|
||||
break;
|
||||
case AMDGPU_XCP_RESUME:
|
||||
run_func = xcp_ip->ip_funcs->resume;
|
||||
break;
|
||||
}
|
||||
|
||||
if (run_func)
|
||||
ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
|
||||
int state)
|
||||
{
|
||||
struct amdgpu_xcp_ip *xcp_ip;
|
||||
struct amdgpu_xcp *xcp;
|
||||
int i, ret;
|
||||
|
||||
if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid)
|
||||
return -EINVAL;
|
||||
|
||||
xcp = &xcp_mgr->xcp[xcp_id];
|
||||
for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) {
|
||||
xcp_ip = &xcp->ip[i];
|
||||
ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
|
||||
{
|
||||
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
|
||||
AMDGPU_XCP_PREPARE_SUSPEND);
|
||||
}
|
||||
|
||||
int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
|
||||
{
|
||||
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND);
|
||||
}
|
||||
|
||||
int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
|
||||
{
|
||||
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
|
||||
AMDGPU_XCP_PREPARE_RESUME);
|
||||
}
|
||||
|
||||
int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
|
||||
{
|
||||
return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME);
|
||||
}
|
||||
|
||||
static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
|
||||
struct amdgpu_xcp_ip *ip)
|
||||
{
|
||||
struct amdgpu_xcp *xcp;
|
||||
|
||||
if (!ip)
|
||||
return;
|
||||
|
||||
xcp = &xcp_mgr->xcp[xcp_id];
|
||||
xcp->ip[ip->ip_id] = *ip;
|
||||
xcp->ip[ip->ip_id].valid = true;
|
||||
|
||||
xcp->valid = true;
|
||||
}
|
||||
|
||||
int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
|
||||
{
|
||||
struct amdgpu_device *adev = xcp_mgr->adev;
|
||||
struct amdgpu_xcp_ip ip;
|
||||
uint8_t mem_id;
|
||||
int i, j, ret;
|
||||
|
||||
if (!num_xcps || num_xcps > MAX_XCP)
|
||||
return -EINVAL;
|
||||
|
||||
xcp_mgr->mode = mode;
|
||||
|
||||
for (i = 0; i < MAX_XCP; ++i)
|
||||
xcp_mgr->xcp[i].valid = false;
|
||||
|
||||
for (i = 0; i < num_xcps; ++i) {
|
||||
for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) {
|
||||
ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j,
|
||||
&ip);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
__amdgpu_xcp_add_block(xcp_mgr, i, &ip);
|
||||
}
|
||||
|
||||
xcp_mgr->xcp[i].id = i;
|
||||
|
||||
if (xcp_mgr->funcs->get_xcp_mem_id) {
|
||||
ret = xcp_mgr->funcs->get_xcp_mem_id(
|
||||
xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
|
||||
if (ret)
|
||||
continue;
|
||||
else
|
||||
xcp_mgr->xcp[i].mem_id = mem_id;
|
||||
}
|
||||
}
|
||||
|
||||
xcp_mgr->num_xcps = num_xcps;
|
||||
amdgpu_xcp_update_partition_sched_list(adev);
|
||||
|
||||
xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
|
||||
{
|
||||
int ret, curr_mode, num_xcps = 0;
|
||||
|
||||
if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
|
||||
return -EINVAL;
|
||||
|
||||
if (xcp_mgr->mode == mode)
|
||||
return 0;
|
||||
|
||||
if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&xcp_mgr->xcp_lock);
|
||||
|
||||
curr_mode = xcp_mgr->mode;
|
||||
/* State set to transient mode */
|
||||
xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
|
||||
|
||||
ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
|
||||
|
||||
if (ret) {
|
||||
/* Failed, get whatever mode it's at now */
|
||||
if (xcp_mgr->funcs->query_partition_mode)
|
||||
xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
|
||||
xcp_mgr, AMDGPU_XCP_FL_LOCKED);
|
||||
else
|
||||
xcp_mgr->mode = curr_mode;
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&xcp_mgr->xcp_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
|
||||
{
|
||||
int mode;
|
||||
|
||||
if (xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
|
||||
return xcp_mgr->mode;
|
||||
|
||||
if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
|
||||
return xcp_mgr->mode;
|
||||
|
||||
if (!(flags & AMDGPU_XCP_FL_LOCKED))
|
||||
mutex_lock(&xcp_mgr->xcp_lock);
|
||||
mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
|
||||
if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
|
||||
dev_WARN(
|
||||
xcp_mgr->adev->dev,
|
||||
"Cached partition mode %d not matching with device mode %d",
|
||||
xcp_mgr->mode, mode);
|
||||
|
||||
if (!(flags & AMDGPU_XCP_FL_LOCKED))
|
||||
mutex_unlock(&xcp_mgr->xcp_lock);
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
|
||||
{
|
||||
struct drm_device *p_ddev;
|
||||
struct drm_device *ddev;
|
||||
int i, ret;
|
||||
|
||||
ddev = adev_to_drm(adev);
|
||||
|
||||
for (i = 0; i < MAX_XCP; i++) {
|
||||
ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Redirect all IOCTLs to the primary device */
|
||||
adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
|
||||
adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev;
|
||||
adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver;
|
||||
adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager;
|
||||
p_ddev->render->dev = ddev;
|
||||
p_ddev->primary->dev = ddev;
|
||||
p_ddev->vma_offset_manager = ddev->vma_offset_manager;
|
||||
p_ddev->driver = &amdgpu_partition_driver;
|
||||
adev->xcp_mgr->xcp[i].ddev = p_ddev;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
|
||||
int init_num_xcps,
|
||||
struct amdgpu_xcp_mgr_funcs *xcp_funcs)
|
||||
{
|
||||
struct amdgpu_xcp_mgr *xcp_mgr;
|
||||
|
||||
if (!xcp_funcs || !xcp_funcs->switch_partition_mode ||
|
||||
!xcp_funcs->get_ip_details)
|
||||
return -EINVAL;
|
||||
|
||||
xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
|
||||
|
||||
if (!xcp_mgr)
|
||||
return -ENOMEM;
|
||||
|
||||
xcp_mgr->adev = adev;
|
||||
xcp_mgr->funcs = xcp_funcs;
|
||||
xcp_mgr->mode = init_mode;
|
||||
mutex_init(&xcp_mgr->xcp_lock);
|
||||
|
||||
if (init_mode != AMDGPU_XCP_MODE_NONE)
|
||||
amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
|
||||
|
||||
adev->xcp_mgr = xcp_mgr;
|
||||
|
||||
return amdgpu_xcp_dev_alloc(adev);
|
||||
}
|
||||
|
||||
int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
|
||||
enum AMDGPU_XCP_IP_BLOCK ip, int instance)
|
||||
{
|
||||
struct amdgpu_xcp *xcp;
|
||||
int i, id_mask = 0;
|
||||
|
||||
if (ip >= AMDGPU_XCP_MAX_BLOCKS)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < xcp_mgr->num_xcps; ++i) {
|
||||
xcp = &xcp_mgr->xcp[i];
|
||||
if ((xcp->valid) && (xcp->ip[ip].valid) &&
|
||||
(xcp->ip[ip].inst_mask & BIT(instance)))
|
||||
id_mask |= BIT(i);
|
||||
}
|
||||
|
||||
if (!id_mask)
|
||||
id_mask = -ENXIO;
|
||||
|
||||
return id_mask;
|
||||
}
|
||||
|
||||
int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
|
||||
enum AMDGPU_XCP_IP_BLOCK ip,
|
||||
uint32_t *inst_mask)
|
||||
{
|
||||
if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
|
||||
return -EINVAL;
|
||||
|
||||
*inst_mask = xcp->ip[ip].inst_mask;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
|
||||
const struct pci_device_id *ent)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
if (!adev->xcp_mgr)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < MAX_XCP; i++) {
|
||||
ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
|
||||
{
|
||||
struct drm_device *p_ddev;
|
||||
int i;
|
||||
|
||||
if (!adev->xcp_mgr)
|
||||
return;
|
||||
|
||||
for (i = 0; i < MAX_XCP; i++) {
|
||||
p_ddev = adev->xcp_mgr->xcp[i].ddev;
|
||||
drm_dev_unplug(p_ddev);
|
||||
p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
|
||||
p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
|
||||
p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
|
||||
p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
|
||||
}
|
||||
}
|
||||
|
||||
int amdgpu_xcp_open_device(struct amdgpu_device *adev,
|
||||
struct amdgpu_fpriv *fpriv,
|
||||
struct drm_file *file_priv)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!adev->xcp_mgr)
|
||||
return 0;
|
||||
|
||||
fpriv->xcp_id = ~0;
|
||||
for (i = 0; i < MAX_XCP; ++i) {
|
||||
if (!adev->xcp_mgr->xcp[i].ddev)
|
||||
break;
|
||||
|
||||
if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) {
|
||||
if (adev->xcp_mgr->xcp[i].valid == FALSE) {
|
||||
dev_err(adev->dev, "renderD%d partition %d not valid!",
|
||||
file_priv->minor->index, i);
|
||||
return -ENOENT;
|
||||
}
|
||||
dev_dbg(adev->dev, "renderD%d partition %d opened!",
|
||||
file_priv->minor->index, i);
|
||||
fpriv->xcp_id = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fpriv->vm.mem_id = fpriv->xcp_id == ~0 ? -1 :
|
||||
adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
|
||||
struct amdgpu_ctx_entity *entity)
|
||||
{
|
||||
struct drm_gpu_scheduler *sched;
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
if (!adev->xcp_mgr)
|
||||
return;
|
||||
|
||||
sched = entity->entity.rq->sched;
|
||||
if (sched->ready) {
|
||||
ring = to_amdgpu_ring(entity->entity.rq->sched);
|
||||
atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef AMDGPU_XCP_H
|
||||
#define AMDGPU_XCP_H
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include "amdgpu_ctx.h"
|
||||
|
||||
#define MAX_XCP 8
|
||||
|
||||
#define AMDGPU_XCP_MODE_NONE -1
|
||||
#define AMDGPU_XCP_MODE_TRANS -2
|
||||
|
||||
#define AMDGPU_XCP_FL_NONE 0
|
||||
#define AMDGPU_XCP_FL_LOCKED (1 << 0)
|
||||
|
||||
struct amdgpu_fpriv;
|
||||
|
||||
enum AMDGPU_XCP_IP_BLOCK {
|
||||
AMDGPU_XCP_GFXHUB,
|
||||
AMDGPU_XCP_GFX,
|
||||
AMDGPU_XCP_SDMA,
|
||||
AMDGPU_XCP_VCN,
|
||||
AMDGPU_XCP_MAX_BLOCKS
|
||||
};
|
||||
|
||||
enum AMDGPU_XCP_STATE {
|
||||
AMDGPU_XCP_PREPARE_SUSPEND,
|
||||
AMDGPU_XCP_SUSPEND,
|
||||
AMDGPU_XCP_PREPARE_RESUME,
|
||||
AMDGPU_XCP_RESUME,
|
||||
};
|
||||
|
||||
struct amdgpu_xcp_ip_funcs {
|
||||
int (*prepare_suspend)(void *handle, uint32_t inst_mask);
|
||||
int (*suspend)(void *handle, uint32_t inst_mask);
|
||||
int (*prepare_resume)(void *handle, uint32_t inst_mask);
|
||||
int (*resume)(void *handle, uint32_t inst_mask);
|
||||
};
|
||||
|
||||
struct amdgpu_xcp_ip {
|
||||
struct amdgpu_xcp_ip_funcs *ip_funcs;
|
||||
uint32_t inst_mask;
|
||||
|
||||
enum AMDGPU_XCP_IP_BLOCK ip_id;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
struct amdgpu_xcp {
|
||||
struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS];
|
||||
|
||||
uint8_t id;
|
||||
uint8_t mem_id;
|
||||
bool valid;
|
||||
atomic_t ref_cnt;
|
||||
struct drm_device *ddev;
|
||||
struct drm_device *rdev;
|
||||
struct drm_device *pdev;
|
||||
struct drm_driver *driver;
|
||||
struct drm_vma_offset_manager *vma_offset_manager;
|
||||
struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
|
||||
};
|
||||
|
||||
struct amdgpu_xcp_mgr {
|
||||
struct amdgpu_device *adev;
|
||||
struct mutex xcp_lock;
|
||||
struct amdgpu_xcp_mgr_funcs *funcs;
|
||||
|
||||
struct amdgpu_xcp xcp[MAX_XCP];
|
||||
uint8_t num_xcps;
|
||||
int8_t mode;
|
||||
|
||||
/* Used to determine KFD memory size limits per XCP */
|
||||
unsigned int num_xcp_per_mem_partition;
|
||||
};
|
||||
|
||||
struct amdgpu_xcp_mgr_funcs {
|
||||
int (*switch_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr, int mode,
|
||||
int *num_xcps);
|
||||
int (*query_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr);
|
||||
int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
|
||||
enum AMDGPU_XCP_IP_BLOCK ip_id,
|
||||
struct amdgpu_xcp_ip *ip);
|
||||
int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
|
||||
struct amdgpu_xcp *xcp, uint8_t *mem_id);
|
||||
|
||||
int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
|
||||
int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
|
||||
int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
|
||||
int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
|
||||
int (*select_scheds)(struct amdgpu_device *adev,
|
||||
u32 hw_ip, u32 hw_prio, struct amdgpu_fpriv *fpriv,
|
||||
unsigned int *num_scheds, struct drm_gpu_scheduler ***scheds);
|
||||
int (*update_partition_sched_list)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
|
||||
int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
|
||||
int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
|
||||
int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
|
||||
|
||||
int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
|
||||
int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
|
||||
int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
|
||||
int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
|
||||
int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
|
||||
int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
|
||||
enum AMDGPU_XCP_IP_BLOCK ip, int instance);
|
||||
|
||||
int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
|
||||
enum AMDGPU_XCP_IP_BLOCK ip,
|
||||
uint32_t *inst_mask);
|
||||
|
||||
int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
|
||||
const struct pci_device_id *ent);
|
||||
void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev);
|
||||
int amdgpu_xcp_open_device(struct amdgpu_device *adev,
|
||||
struct amdgpu_fpriv *fpriv,
|
||||
struct drm_file *file_priv);
|
||||
void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
|
||||
struct amdgpu_ctx_entity *entity);
|
||||
|
||||
#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \
|
||||
((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
|
||||
(adev)->xcp_mgr->funcs->select_scheds ? \
|
||||
(adev)->xcp_mgr->funcs->select_scheds((adev), (e), (c), (d), (x), (y)) : -ENOENT)
|
||||
#define amdgpu_xcp_update_partition_sched_list(adev) \
|
||||
((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
|
||||
(adev)->xcp_mgr->funcs->update_partition_sched_list ? \
|
||||
(adev)->xcp_mgr->funcs->update_partition_sched_list(adev) : 0)
|
||||
|
||||
static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
|
||||
{
|
||||
if (!xcp_mgr)
|
||||
return 1;
|
||||
else
|
||||
return xcp_mgr->num_xcps;
|
||||
}
|
||||
|
||||
static inline struct amdgpu_xcp *
|
||||
amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
|
||||
{
|
||||
if (!xcp_mgr)
|
||||
return NULL;
|
||||
|
||||
while (*from < MAX_XCP) {
|
||||
if (xcp_mgr->xcp[*from].valid)
|
||||
return &xcp_mgr->xcp[*from];
|
||||
++(*from);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define for_each_xcp(xcp_mgr, xcp, i) \
|
||||
for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
|
||||
xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
|
||||
|
||||
#endif
|
|
@ -1014,7 +1014,8 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
/* Trigger XGMI/WAFL error */
|
||||
static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if)
|
||||
static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
|
||||
void *inject_if, uint32_t instance_mask)
|
||||
{
|
||||
int ret = 0;
|
||||
struct ta_ras_trigger_error_input *block_info =
|
||||
|
@ -1026,7 +1027,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *injec
|
|||
if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
|
||||
dev_warn(adev->dev, "Failed to disallow XGMI power down");
|
||||
|
||||
ret = psp_ras_trigger_error(&adev->psp, block_info);
|
||||
ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
|
||||
|
||||
if (amdgpu_ras_intr_triggered())
|
||||
return ret;
|
||||
|
|
|
@ -70,7 +70,6 @@ enum amd_sriov_ucode_engine_id {
|
|||
AMD_SRIOV_UCODE_ID_RLC_SRLS,
|
||||
AMD_SRIOV_UCODE_ID_MEC,
|
||||
AMD_SRIOV_UCODE_ID_MEC2,
|
||||
AMD_SRIOV_UCODE_ID_IMU,
|
||||
AMD_SRIOV_UCODE_ID_SOS,
|
||||
AMD_SRIOV_UCODE_ID_ASD,
|
||||
AMD_SRIOV_UCODE_ID_TA_RAS,
|
||||
|
|
|
@ -0,0 +1,661 @@
|
|||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "soc15.h"
|
||||
|
||||
#include "soc15_common.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
#include "gfx_v9_4_3.h"
|
||||
#include "gfxhub_v1_2.h"
|
||||
#include "sdma_v4_4_2.h"
|
||||
|
||||
#define XCP_INST_MASK(num_inst, xcp_id) \
|
||||
(num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
|
||||
|
||||
#define AMDGPU_XCP_OPS_KFD (1 << 0)
|
||||
|
||||
void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
adev->doorbell_index.kiq = AMDGPU_DOORBELL_LAYOUT1_KIQ_START;
|
||||
|
||||
adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START;
|
||||
|
||||
adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START;
|
||||
adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END;
|
||||
adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE;
|
||||
|
||||
adev->doorbell_index.sdma_doorbell_range = 20;
|
||||
for (i = 0; i < adev->sdma.num_instances; i++)
|
||||
adev->doorbell_index.sdma_engine[i] =
|
||||
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START +
|
||||
i * (adev->doorbell_index.sdma_doorbell_range >> 1);
|
||||
|
||||
adev->doorbell_index.ih = AMDGPU_DOORBELL_LAYOUT1_IH;
|
||||
adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL_LAYOUT1_VCN_START;
|
||||
|
||||
adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP;
|
||||
adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP;
|
||||
|
||||
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
|
||||
}
|
||||
|
||||
static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
|
||||
uint32_t inst_idx, struct amdgpu_ring *ring)
|
||||
{
|
||||
int xcp_id;
|
||||
enum AMDGPU_XCP_IP_BLOCK ip_blk;
|
||||
uint32_t inst_mask;
|
||||
|
||||
ring->xcp_id = ~0;
|
||||
if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
|
||||
return;
|
||||
|
||||
inst_mask = 1 << inst_idx;
|
||||
|
||||
switch (ring->funcs->type) {
|
||||
case AMDGPU_HW_IP_GFX:
|
||||
case AMDGPU_RING_TYPE_COMPUTE:
|
||||
case AMDGPU_RING_TYPE_KIQ:
|
||||
ip_blk = AMDGPU_XCP_GFX;
|
||||
break;
|
||||
case AMDGPU_RING_TYPE_SDMA:
|
||||
ip_blk = AMDGPU_XCP_SDMA;
|
||||
break;
|
||||
case AMDGPU_RING_TYPE_VCN_ENC:
|
||||
case AMDGPU_RING_TYPE_VCN_JPEG:
|
||||
ip_blk = AMDGPU_XCP_VCN;
|
||||
if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
|
||||
inst_mask = 1 << (inst_idx * 2);
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("Not support ring type %d!", ring->funcs->type);
|
||||
return;
|
||||
}
|
||||
|
||||
for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
|
||||
if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
|
||||
ring->xcp_id = xcp_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void aqua_vanjaram_xcp_gpu_sched_update(
|
||||
struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
unsigned int sel_xcp_id)
|
||||
{
|
||||
unsigned int *num_gpu_sched;
|
||||
|
||||
num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
|
||||
.gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
|
||||
adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
|
||||
.sched[(*num_gpu_sched)++] = &ring->sched;
|
||||
DRM_DEBUG("%s :[%d] gpu_sched[%d][%d] = %d", ring->name,
|
||||
sel_xcp_id, ring->funcs->type,
|
||||
ring->hw_prio, *num_gpu_sched);
|
||||
}
|
||||
|
||||
static int aqua_vanjaram_xcp_sched_list_update(
|
||||
struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MAX_XCP; i++) {
|
||||
atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
|
||||
memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
|
||||
}
|
||||
|
||||
if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
|
||||
ring = adev->rings[i];
|
||||
if (!ring || !ring->sched.ready)
|
||||
continue;
|
||||
|
||||
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
|
||||
|
||||
/* VCN is shared by two partitions under CPX MODE */
|
||||
if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
|
||||
ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
|
||||
adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
|
||||
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aqua_vanjaram_update_partition_sched_list(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->num_rings; i++) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
|
||||
ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
|
||||
aqua_vanjaram_set_xcp_id(adev, ring->xcc_id, ring);
|
||||
else
|
||||
aqua_vanjaram_set_xcp_id(adev, ring->me, ring);
|
||||
}
|
||||
|
||||
return aqua_vanjaram_xcp_sched_list_update(adev);
|
||||
}
|
||||
|
||||
static int aqua_vanjaram_select_scheds(
|
||||
struct amdgpu_device *adev,
|
||||
u32 hw_ip,
|
||||
u32 hw_prio,
|
||||
struct amdgpu_fpriv *fpriv,
|
||||
unsigned int *num_scheds,
|
||||
struct drm_gpu_scheduler ***scheds)
|
||||
{
|
||||
u32 sel_xcp_id;
|
||||
int i;
|
||||
|
||||
if (fpriv->xcp_id == ~0) {
|
||||
u32 least_ref_cnt = ~0;
|
||||
|
||||
fpriv->xcp_id = 0;
|
||||
for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
|
||||
u32 total_ref_cnt;
|
||||
|
||||
total_ref_cnt = atomic_read(&adev->xcp_mgr->xcp[i].ref_cnt);
|
||||
if (total_ref_cnt < least_ref_cnt) {
|
||||
fpriv->xcp_id = i;
|
||||
least_ref_cnt = total_ref_cnt;
|
||||
}
|
||||
}
|
||||
}
|
||||
sel_xcp_id = fpriv->xcp_id;
|
||||
|
||||
if (adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
|
||||
*num_scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
|
||||
*scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
|
||||
atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
|
||||
DRM_DEBUG("Selected partition #%d", sel_xcp_id);
|
||||
} else {
|
||||
DRM_ERROR("Failed to schedule partition #%d.", sel_xcp_id);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev,
|
||||
enum amd_hw_ip_block_type block,
|
||||
int8_t inst)
|
||||
{
|
||||
int8_t dev_inst;
|
||||
|
||||
switch (block) {
|
||||
case GC_HWIP:
|
||||
case SDMA0_HWIP:
|
||||
/* Both JPEG and VCN as JPEG is only alias of VCN */
|
||||
case VCN_HWIP:
|
||||
dev_inst = adev->ip_map.dev_inst[block][inst];
|
||||
break;
|
||||
default:
|
||||
/* For rest of the IPs, no look up required.
|
||||
* Assume 'logical instance == physical instance' for all configs. */
|
||||
dev_inst = inst;
|
||||
break;
|
||||
}
|
||||
|
||||
return dev_inst;
|
||||
}
|
||||
|
||||
static uint32_t aqua_vanjaram_logical_to_dev_mask(struct amdgpu_device *adev,
|
||||
enum amd_hw_ip_block_type block,
|
||||
uint32_t mask)
|
||||
{
|
||||
uint32_t dev_mask = 0;
|
||||
int8_t log_inst, dev_inst;
|
||||
|
||||
while (mask) {
|
||||
log_inst = ffs(mask) - 1;
|
||||
dev_inst = aqua_vanjaram_logical_to_dev_inst(adev, block, log_inst);
|
||||
dev_mask |= (1 << dev_inst);
|
||||
mask &= ~(1 << log_inst);
|
||||
}
|
||||
|
||||
return dev_mask;
|
||||
}
|
||||
|
||||
static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev,
|
||||
enum amd_hw_ip_block_type ip_block,
|
||||
uint32_t inst_mask)
|
||||
{
|
||||
int l = 0, i;
|
||||
|
||||
while (inst_mask) {
|
||||
i = ffs(inst_mask) - 1;
|
||||
adev->ip_map.dev_inst[ip_block][l++] = i;
|
||||
inst_mask &= ~(1 << i);
|
||||
}
|
||||
for (; l < HWIP_MAX_INSTANCE; l++)
|
||||
adev->ip_map.dev_inst[ip_block][l] = -1;
|
||||
}
|
||||
|
||||
void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ip_map[][2] = {
|
||||
{ GC_HWIP, adev->gfx.xcc_mask },
|
||||
{ SDMA0_HWIP, adev->sdma.sdma_mask },
|
||||
{ VCN_HWIP, adev->vcn.inst_mask },
|
||||
};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
|
||||
aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
|
||||
|
||||
adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst;
|
||||
adev->ip_map.logical_to_dev_mask = aqua_vanjaram_logical_to_dev_mask;
|
||||
}
|
||||
|
||||
/* Fixed pattern for smn addressing on different AIDs:
|
||||
* bit[34]: indicate cross AID access
|
||||
* bit[33:32]: indicate target AID id
|
||||
* AID id range is 0 ~ 3 as maximum AID number is 4.
|
||||
*/
|
||||
u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
|
||||
{
|
||||
u64 ext_offset;
|
||||
|
||||
/* local routing and bit[34:32] will be zeros */
|
||||
if (ext_id == 0)
|
||||
return 0;
|
||||
|
||||
/* Initiated from host, accessing to all non-zero aids are cross traffic */
|
||||
ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34);
|
||||
|
||||
return ext_offset;
|
||||
}
|
||||
|
||||
static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
|
||||
{
|
||||
enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
|
||||
struct amdgpu_device *adev = xcp_mgr->adev;
|
||||
|
||||
if (adev->nbio.funcs->get_compute_partition_mode)
|
||||
mode = adev->nbio.funcs->get_compute_partition_mode(adev);
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
static int __aqua_vanjaram_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
|
||||
{
|
||||
int num_xcc, num_xcc_per_xcp = 0;
|
||||
|
||||
num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
|
||||
|
||||
switch (mode) {
|
||||
case AMDGPU_SPX_PARTITION_MODE:
|
||||
num_xcc_per_xcp = num_xcc;
|
||||
break;
|
||||
case AMDGPU_DPX_PARTITION_MODE:
|
||||
num_xcc_per_xcp = num_xcc / 2;
|
||||
break;
|
||||
case AMDGPU_TPX_PARTITION_MODE:
|
||||
num_xcc_per_xcp = num_xcc / 3;
|
||||
break;
|
||||
case AMDGPU_QPX_PARTITION_MODE:
|
||||
num_xcc_per_xcp = num_xcc / 4;
|
||||
break;
|
||||
case AMDGPU_CPX_PARTITION_MODE:
|
||||
num_xcc_per_xcp = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
return num_xcc_per_xcp;
|
||||
}
|
||||
|
||||
static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
|
||||
enum AMDGPU_XCP_IP_BLOCK ip_id,
|
||||
struct amdgpu_xcp_ip *ip)
|
||||
{
|
||||
struct amdgpu_device *adev = xcp_mgr->adev;
|
||||
int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
|
||||
int num_sdma, num_vcn;
|
||||
|
||||
num_sdma = adev->sdma.num_instances;
|
||||
num_vcn = adev->vcn.num_vcn_inst;
|
||||
|
||||
switch (xcp_mgr->mode) {
|
||||
case AMDGPU_SPX_PARTITION_MODE:
|
||||
num_sdma_xcp = num_sdma;
|
||||
num_vcn_xcp = num_vcn;
|
||||
break;
|
||||
case AMDGPU_DPX_PARTITION_MODE:
|
||||
num_sdma_xcp = num_sdma / 2;
|
||||
num_vcn_xcp = num_vcn / 2;
|
||||
break;
|
||||
case AMDGPU_TPX_PARTITION_MODE:
|
||||
num_sdma_xcp = num_sdma / 3;
|
||||
num_vcn_xcp = num_vcn / 3;
|
||||
break;
|
||||
case AMDGPU_QPX_PARTITION_MODE:
|
||||
num_sdma_xcp = num_sdma / 4;
|
||||
num_vcn_xcp = num_vcn / 4;
|
||||
break;
|
||||
case AMDGPU_CPX_PARTITION_MODE:
|
||||
num_sdma_xcp = 2;
|
||||
num_vcn_xcp = num_vcn ? 1 : 0;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
|
||||
|
||||
switch (ip_id) {
|
||||
case AMDGPU_XCP_GFXHUB:
|
||||
ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
|
||||
ip->ip_funcs = &gfxhub_v1_2_xcp_funcs;
|
||||
break;
|
||||
case AMDGPU_XCP_GFX:
|
||||
ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
|
||||
ip->ip_funcs = &gfx_v9_4_3_xcp_funcs;
|
||||
break;
|
||||
case AMDGPU_XCP_SDMA:
|
||||
ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id);
|
||||
ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
|
||||
break;
|
||||
case AMDGPU_XCP_VCN:
|
||||
ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id);
|
||||
/* TODO : Assign IP funcs */
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ip->ip_id = ip_id;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum amdgpu_gfx_partition
|
||||
__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
|
||||
{
|
||||
struct amdgpu_device *adev = xcp_mgr->adev;
|
||||
int num_xcc;
|
||||
|
||||
num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
|
||||
|
||||
if (adev->gmc.num_mem_partitions == 1)
|
||||
return AMDGPU_SPX_PARTITION_MODE;
|
||||
|
||||
if (adev->gmc.num_mem_partitions == num_xcc)
|
||||
return AMDGPU_CPX_PARTITION_MODE;
|
||||
|
||||
if (adev->gmc.num_mem_partitions == num_xcc / 2)
|
||||
return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
|
||||
AMDGPU_QPX_PARTITION_MODE;
|
||||
|
||||
if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
|
||||
return AMDGPU_DPX_PARTITION_MODE;
|
||||
|
||||
return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
|
||||
}
|
||||
|
||||
static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
|
||||
enum amdgpu_gfx_partition mode)
|
||||
{
|
||||
struct amdgpu_device *adev = xcp_mgr->adev;
|
||||
int num_xcc, num_xccs_per_xcp;
|
||||
|
||||
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
|
||||
switch (mode) {
|
||||
case AMDGPU_SPX_PARTITION_MODE:
|
||||
return adev->gmc.num_mem_partitions == 1 && num_xcc > 0;
|
||||
case AMDGPU_DPX_PARTITION_MODE:
|
||||
return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0;
|
||||
case AMDGPU_TPX_PARTITION_MODE:
|
||||
return (adev->gmc.num_mem_partitions == 1 ||
|
||||
adev->gmc.num_mem_partitions == 3) &&
|
||||
((num_xcc % 3) == 0);
|
||||
case AMDGPU_QPX_PARTITION_MODE:
|
||||
num_xccs_per_xcp = num_xcc / 4;
|
||||
return (adev->gmc.num_mem_partitions == 1 ||
|
||||
adev->gmc.num_mem_partitions == 4) &&
|
||||
(num_xccs_per_xcp >= 2);
|
||||
case AMDGPU_CPX_PARTITION_MODE:
|
||||
return ((num_xcc > 1) &&
|
||||
(adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) &&
|
||||
(num_xcc % adev->gmc.num_mem_partitions) == 0);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int __aqua_vanjaram_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
|
||||
{
|
||||
/* TODO:
|
||||
* Stop user queues and threads, and make sure GPU is empty of work.
|
||||
*/
|
||||
|
||||
if (flags & AMDGPU_XCP_OPS_KFD)
|
||||
amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (flags & AMDGPU_XCP_OPS_KFD) {
|
||||
amdgpu_amdkfd_device_probe(xcp_mgr->adev);
|
||||
amdgpu_amdkfd_device_init(xcp_mgr->adev);
|
||||
/* If KFD init failed, return failure */
|
||||
if (!xcp_mgr->adev->kfd.init_complete)
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
|
||||
int mode, int *num_xcps)
|
||||
{
|
||||
int num_xcc_per_xcp, num_xcc, ret;
|
||||
struct amdgpu_device *adev;
|
||||
u32 flags = 0;
|
||||
|
||||
adev = xcp_mgr->adev;
|
||||
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
|
||||
|
||||
if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
|
||||
mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
|
||||
} else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
|
||||
dev_err(adev->dev,
|
||||
"Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
|
||||
amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (adev->kfd.init_complete)
|
||||
flags |= AMDGPU_XCP_OPS_KFD;
|
||||
|
||||
if (flags & AMDGPU_XCP_OPS_KFD) {
|
||||
ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = __aqua_vanjaram_pre_partition_switch(xcp_mgr, flags);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode);
|
||||
if (adev->gfx.funcs->switch_partition_mode)
|
||||
adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev,
|
||||
num_xcc_per_xcp);
|
||||
|
||||
if (adev->nbio.funcs->set_compute_partition_mode)
|
||||
adev->nbio.funcs->set_compute_partition_mode(adev, mode);
|
||||
|
||||
/* Init info about new xcps */
|
||||
*num_xcps = num_xcc / num_xcc_per_xcp;
|
||||
amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
|
||||
|
||||
ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags);
|
||||
unlock:
|
||||
if (flags & AMDGPU_XCP_OPS_KFD)
|
||||
amdgpu_amdkfd_unlock_kfd(adev);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev,
|
||||
int xcc_id, uint8_t *mem_id)
|
||||
{
|
||||
/* memory/spatial modes validation check is already done */
|
||||
*mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
|
||||
*mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
|
||||
struct amdgpu_xcp *xcp, uint8_t *mem_id)
|
||||
{
|
||||
struct amdgpu_numa_info numa_info;
|
||||
struct amdgpu_device *adev;
|
||||
uint32_t xcc_mask;
|
||||
int r, i, xcc_id;
|
||||
|
||||
adev = xcp_mgr->adev;
|
||||
/* TODO: BIOS is not returning the right info now
|
||||
* Check on this later
|
||||
*/
|
||||
/*
|
||||
if (adev->gmc.gmc_funcs->query_mem_partition_mode)
|
||||
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
|
||||
*/
|
||||
if (adev->gmc.num_mem_partitions == 1) {
|
||||
/* Only one range */
|
||||
*mem_id = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
|
||||
if (r || !xcc_mask)
|
||||
return -EINVAL;
|
||||
|
||||
xcc_id = ffs(xcc_mask) - 1;
|
||||
if (!adev->gmc.is_app_apu)
|
||||
return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id);
|
||||
|
||||
r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
|
||||
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = -EINVAL;
|
||||
for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
|
||||
if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
|
||||
*mem_id = i;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
|
||||
enum AMDGPU_XCP_IP_BLOCK ip_id,
|
||||
struct amdgpu_xcp_ip *ip)
|
||||
{
|
||||
if (!ip)
|
||||
return -EINVAL;
|
||||
|
||||
return __aqua_vanjaram_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip);
|
||||
}
|
||||
|
||||
struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
|
||||
.switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
|
||||
.query_partition_mode = &aqua_vanjaram_query_partition_mode,
|
||||
.get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
|
||||
.get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
|
||||
.select_scheds = &aqua_vanjaram_select_scheds,
|
||||
.update_partition_sched_list = &aqua_vanjaram_update_partition_sched_list
|
||||
};
|
||||
|
||||
static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1,
|
||||
&aqua_vanjaram_xcp_funcs);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* TODO: Default memory node affinity init */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 mask, inst_mask = adev->sdma.sdma_mask;
|
||||
int ret, i;
|
||||
|
||||
/* generally 1 AID supports 4 instances */
|
||||
adev->sdma.num_inst_per_aid = 4;
|
||||
adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask);
|
||||
|
||||
adev->aid_mask = i = 1;
|
||||
inst_mask >>= adev->sdma.num_inst_per_aid;
|
||||
|
||||
for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
|
||||
inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
|
||||
if ((inst_mask & mask) == mask)
|
||||
adev->aid_mask |= (1 << i);
|
||||
}
|
||||
|
||||
/* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be
|
||||
* addressed based on logical instance ids.
|
||||
*/
|
||||
adev->vcn.harvest_config = 0;
|
||||
adev->vcn.num_inst_per_aid = 1;
|
||||
adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask);
|
||||
adev->jpeg.harvest_config = 0;
|
||||
adev->jpeg.num_inst_per_aid = 1;
|
||||
adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask);
|
||||
|
||||
ret = aqua_vanjaram_xcp_mgr_init(adev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
aqua_vanjaram_ip_map_init(adev);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1141,12 +1141,12 @@ static uint32_t cik_get_register_value(struct amdgpu_device *adev,
|
|||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
if (se_num != 0xffffffff || sh_num != 0xffffffff)
|
||||
amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
|
||||
amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
|
||||
|
||||
val = RREG32(reg_offset);
|
||||
|
||||
if (se_num != 0xffffffff || sh_num != 0xffffffff)
|
||||
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
|
||||
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
return val;
|
||||
} else {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue