Merge tag 'amd-drm-next-5.8-2020-05-19' of git://people.freedesktop.org/~agd5f/linux into drm-next

amd-drm-next-5.8-2020-05-19:

amdgpu:
- Improved handling for CTF (Critical Thermal Fault) situations
- Clarify AC/DC mode switches
- SR-IOV fixes
- XGMI fixes for RAS
- Misc cleanups
- Add autodump debugfs node to aid in GPU hang debugging

UAPI:
- Add a MEM_SYNC IB flag for handling proper acquire memory semantics if UMDs expect the kernel to handle this
  Used by AMDVLK: https://github.com/GPUOpen-Drivers/pal/blob/dev/src/core/os/amdgpu/amdgpuQueue.cpp#L1262

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200519202505.4126-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2020-05-20 13:28:04 +10:00
commit bfbe1744e4
32 changed files with 774 additions and 312 deletions

View File

@ -989,6 +989,8 @@ struct amdgpu_device {
char product_number[16];
char product_name[32];
char serial[16];
struct amdgpu_autodump autodump;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)

View File

@ -27,7 +27,7 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>
#include <linux/poll.h>
#include <drm/drm_debugfs.h>
#include "amdgpu.h"
@ -74,7 +74,81 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
return 0;
}
int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
unsigned long timeout = 600 * HZ;
int ret;
wake_up_interruptible(&adev->autodump.gpu_hang);
ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
if (ret == 0) {
pr_err("autodump: timeout, move on to gpu recovery\n");
return -ETIMEDOUT;
}
#endif
return 0;
}
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
{
struct amdgpu_device *adev = inode->i_private;
int ret;
file->private_data = adev;
mutex_lock(&adev->lock_reset);
if (adev->autodump.dumping.done) {
reinit_completion(&adev->autodump.dumping);
ret = 0;
} else {
ret = -EBUSY;
}
mutex_unlock(&adev->lock_reset);
return ret;
}
static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
{
struct amdgpu_device *adev = file->private_data;
complete_all(&adev->autodump.dumping);
return 0;
}
static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
{
struct amdgpu_device *adev = file->private_data;
poll_wait(file, &adev->autodump.gpu_hang, poll_table);
if (adev->in_gpu_reset)
return POLLIN | POLLRDNORM | POLLWRNORM;
return 0;
}
static const struct file_operations autodump_debug_fops = {
.owner = THIS_MODULE,
.open = amdgpu_debugfs_autodump_open,
.poll = amdgpu_debugfs_autodump_poll,
.release = amdgpu_debugfs_autodump_release,
};
static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
{
init_completion(&adev->autodump.dumping);
complete_all(&adev->autodump.dumping);
init_waitqueue_head(&adev->autodump.gpu_hang);
debugfs_create_file("amdgpu_autodump", 0600,
adev->ddev->primary->debugfs_root,
adev, &autodump_debug_fops);
}
/**
* amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
@ -1434,6 +1508,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_ras_debugfs_create_all(adev);
amdgpu_debugfs_autodump_init(adev);
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
ARRAY_SIZE(amdgpu_debugfs_list));
}

View File

@ -31,6 +31,11 @@ struct amdgpu_debugfs {
unsigned num_files;
};
struct amdgpu_autodump {
struct completion dumping;
struct wait_queue_head gpu_hang;
};
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
int amdgpu_debugfs_init(struct amdgpu_device *adev);
void amdgpu_debugfs_fini(struct amdgpu_device *adev);
@ -40,3 +45,4 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
int amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
int amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);

View File

@ -3927,6 +3927,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
int i, r = 0;
bool need_full_reset = *need_full_reset_arg;
amdgpu_debugfs_wait_dump(adev);
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];

View File

@ -1188,3 +1188,13 @@ int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev,
return ret;
}
int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en)
{
struct smu_context *smu = &adev->smu;
if (is_support_sw_smu(adev))
return smu_allow_xgmi_power_down(smu, en);
return 0;
}

View File

@ -538,4 +538,6 @@ int amdgpu_dpm_baco_enter(struct amdgpu_device *adev);
int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev,
uint32_t cstate);
int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en);
#endif

View File

@ -86,9 +86,10 @@
* - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
* - 3.36.0 - Allow reading more status registers on si/cik
* - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
* - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 37
#define KMS_DRIVER_MINOR 38
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;

View File

@ -189,6 +189,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
dma_fence_put(tmp);
}
if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync)
ring->funcs->emit_mem_sync(ring);
if (ring->funcs->insert_start)
ring->funcs->insert_start(ring);

View File

@ -154,9 +154,9 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
*
*/
static ssize_t amdgpu_get_dpm_state(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
@ -189,10 +189,10 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
(pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance");
}
static ssize_t amdgpu_set_dpm_state(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
@ -294,9 +294,9 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
*
*/
static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
@ -332,10 +332,10 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
"unknown");
}
static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
@ -873,10 +873,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
* the corresponding bit from original ppfeature masks and input the
* new ppfeature masks.
*/
static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
static ssize_t amdgpu_set_pp_features(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
@ -917,9 +917,9 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
return count;
}
static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t amdgpu_get_pp_features(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
@ -1663,9 +1663,9 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
* The SMU firmware computes a percentage of load based on the
* aggregate activity level in the IP cores.
*/
static ssize_t amdgpu_get_busy_percent(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
@ -1699,9 +1699,9 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
* The SMU firmware computes a percentage of load based on the
* aggregate activity level in the IP cores.
*/
static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
@ -1790,57 +1790,174 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
return 0;
}
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
amdgpu_set_dpm_forced_performance_level);
static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL);
static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL);
static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR,
amdgpu_get_pp_force_state,
amdgpu_set_pp_force_state);
static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR,
amdgpu_get_pp_table,
amdgpu_set_pp_table);
static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_sclk,
amdgpu_set_pp_dpm_sclk);
static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_mclk,
amdgpu_set_pp_dpm_mclk);
static DEVICE_ATTR(pp_dpm_socclk, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_socclk,
amdgpu_set_pp_dpm_socclk);
static DEVICE_ATTR(pp_dpm_fclk, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_fclk,
amdgpu_set_pp_dpm_fclk);
static DEVICE_ATTR(pp_dpm_dcefclk, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_dcefclk,
amdgpu_set_pp_dpm_dcefclk);
static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
amdgpu_get_pp_dpm_pcie,
amdgpu_set_pp_dpm_pcie);
static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
amdgpu_get_pp_sclk_od,
amdgpu_set_pp_sclk_od);
static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
amdgpu_get_pp_mclk_od,
amdgpu_set_pp_mclk_od);
static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
amdgpu_get_pp_power_profile_mode,
amdgpu_set_pp_power_profile_mode);
static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_get_pp_od_clk_voltage,
amdgpu_set_pp_od_clk_voltage);
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
amdgpu_get_busy_percent, NULL);
static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
amdgpu_get_memory_busy_percent, NULL);
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR,
amdgpu_get_pp_feature_status,
amdgpu_set_pp_feature_status);
static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RO(pp_num_states, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(pp_cur_state, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_force_state, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_table, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_sclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_mclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_fclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_dcefclk, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_sclk_od, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_mclk_od, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_power_profile_mode, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_od_clk_voltage, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(gpu_busy_percent, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(mem_busy_percent, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(pcie_bw, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC),
};
static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
uint32_t mask)
{
struct device_attribute *dev_attr = &attr->dev_attr;
const char *attr_name = dev_attr->attr.name;
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
enum amd_asic_type asic_type = adev->asic_type;
if (!(attr->flags & mask)) {
attr->states = ATTR_STATE_UNSUPPORTED;
return 0;
}
#define DEVICE_ATTR_IS(_name) (!strcmp(attr_name, #_name))
if (DEVICE_ATTR_IS(pp_dpm_socclk)) {
if (asic_type <= CHIP_VEGA10)
attr->states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) {
if (asic_type <= CHIP_VEGA10 || asic_type == CHIP_ARCTURUS)
attr->states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_dpm_fclk)) {
if (asic_type < CHIP_VEGA20)
attr->states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_dpm_pcie)) {
if (asic_type == CHIP_ARCTURUS)
attr->states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) {
attr->states = ATTR_STATE_UNSUPPORTED;
if ((is_support_sw_smu(adev) && adev->smu.od_enabled) ||
(!is_support_sw_smu(adev) && hwmgr->od_enabled))
attr->states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(mem_busy_percent)) {
if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10)
attr->states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pcie_bw)) {
/* PCIe Perf counters won't work on APU nodes */
if (adev->flags & AMD_IS_APU)
attr->states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(unique_id)) {
if (!adev->unique_id)
attr->states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_features)) {
if (adev->flags & AMD_IS_APU || asic_type <= CHIP_VEGA10)
attr->states = ATTR_STATE_UNSUPPORTED;
}
if (asic_type == CHIP_ARCTURUS) {
/* Arcturus does not support standalone mclk/socclk/fclk level setting */
if (DEVICE_ATTR_IS(pp_dpm_mclk) ||
DEVICE_ATTR_IS(pp_dpm_socclk) ||
DEVICE_ATTR_IS(pp_dpm_fclk)) {
dev_attr->attr.mode &= ~S_IWUGO;
dev_attr->store = NULL;
}
}
#undef DEVICE_ATTR_IS
return 0;
}
static int amdgpu_device_attr_create(struct amdgpu_device *adev,
struct amdgpu_device_attr *attr,
uint32_t mask)
{
int ret = 0;
struct device_attribute *dev_attr = &attr->dev_attr;
const char *name = dev_attr->attr.name;
int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
uint32_t mask) = default_attr_update;
BUG_ON(!attr);
attr_update = attr->attr_update ? attr_update : default_attr_update;
ret = attr_update(adev, attr, mask);
if (ret) {
dev_err(adev->dev, "failed to update device file %s, ret = %d\n",
name, ret);
return ret;
}
/* the attr->states maybe changed after call attr->attr_update function */
if (attr->states == ATTR_STATE_UNSUPPORTED)
return 0;
ret = device_create_file(adev->dev, dev_attr);
if (ret) {
dev_err(adev->dev, "failed to create device file %s, ret = %d\n",
name, ret);
}
attr->states = ATTR_STATE_SUPPORTED;
return ret;
}
static void amdgpu_device_attr_remove(struct amdgpu_device *adev, struct amdgpu_device_attr *attr)
{
struct device_attribute *dev_attr = &attr->dev_attr;
if (attr->states == ATTR_STATE_UNSUPPORTED)
return;
device_remove_file(adev->dev, dev_attr);
attr->states = ATTR_STATE_UNSUPPORTED;
}
static int amdgpu_device_attr_create_groups(struct amdgpu_device *adev,
struct amdgpu_device_attr *attrs,
uint32_t counts,
uint32_t mask)
{
int ret = 0;
uint32_t i = 0;
for (i = 0; i < counts; i++) {
ret = amdgpu_device_attr_create(adev, &attrs[i], mask);
if (ret)
goto failed;
}
return 0;
failed:
for (; i > 0; i--) {
amdgpu_device_attr_remove(adev, &attrs[i]);
}
return ret;
}
static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev,
struct amdgpu_device_attr *attrs,
uint32_t counts)
{
uint32_t i = 0;
for (i = 0; i < counts; i++)
amdgpu_device_attr_remove(adev, &attrs[i]);
}
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@ -3241,8 +3358,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio
int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
{
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
int ret;
uint32_t mask = 0;
if (adev->pm.sysfs_initialized)
return 0;
@ -3260,168 +3377,25 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_power_dpm_state);
if (ret) {
DRM_ERROR("failed to create device file for dpm state\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
if (ret) {
DRM_ERROR("failed to create device file for dpm state\n");
return ret;
switch (amdgpu_virt_get_sriov_vf_mode(adev)) {
case SRIOV_VF_MODE_ONE_VF:
mask = ATTR_FLAG_ONEVF;
break;
case SRIOV_VF_MODE_MULTI_VF:
mask = 0;
break;
case SRIOV_VF_MODE_BARE_METAL:
default:
mask = ATTR_FLAG_MASK_ALL;
break;
}
if (!amdgpu_sriov_vf(adev)) {
ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
if (ret) {
DRM_ERROR("failed to create device file pp_num_states\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
if (ret) {
DRM_ERROR("failed to create device file pp_cur_state\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
if (ret) {
DRM_ERROR("failed to create device file pp_force_state\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_table);
if (ret) {
DRM_ERROR("failed to create device file pp_table\n");
return ret;
}
}
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_sclk\n");
ret = amdgpu_device_attr_create_groups(adev,
amdgpu_device_attrs,
ARRAY_SIZE(amdgpu_device_attrs),
mask);
if (ret)
return ret;
}
/* Arcturus does not support standalone mclk/socclk/fclk level setting */
if (adev->asic_type == CHIP_ARCTURUS) {
dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO;
dev_attr_pp_dpm_mclk.store = NULL;
dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO;
dev_attr_pp_dpm_socclk.store = NULL;
dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO;
dev_attr_pp_dpm_fclk.store = NULL;
}
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_mclk\n");
return ret;
}
if (adev->asic_type >= CHIP_VEGA10) {
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_socclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_socclk\n");
return ret;
}
if (adev->asic_type != CHIP_ARCTURUS) {
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_dcefclk\n");
return ret;
}
}
}
if (adev->asic_type >= CHIP_VEGA20) {
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_fclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_fclk\n");
return ret;
}
}
/* the reset are not needed for SRIOV one vf mode */
if (amdgpu_sriov_vf(adev)) {
adev->pm.sysfs_initialized = true;
return ret;
}
if (adev->asic_type != CHIP_ARCTURUS) {
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_pcie\n");
return ret;
}
}
ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);
if (ret) {
DRM_ERROR("failed to create device file pp_sclk_od\n");
return ret;
}
ret = device_create_file(adev->dev, &dev_attr_pp_mclk_od);
if (ret) {
DRM_ERROR("failed to create device file pp_mclk_od\n");
return ret;
}
ret = device_create_file(adev->dev,
&dev_attr_pp_power_profile_mode);
if (ret) {
DRM_ERROR("failed to create device file "
"pp_power_profile_mode\n");
return ret;
}
if ((is_support_sw_smu(adev) && adev->smu.od_enabled) ||
(!is_support_sw_smu(adev) && hwmgr->od_enabled)) {
ret = device_create_file(adev->dev,
&dev_attr_pp_od_clk_voltage);
if (ret) {
DRM_ERROR("failed to create device file "
"pp_od_clk_voltage\n");
return ret;
}
}
ret = device_create_file(adev->dev,
&dev_attr_gpu_busy_percent);
if (ret) {
DRM_ERROR("failed to create device file "
"gpu_busy_level\n");
return ret;
}
/* APU does not have its own dedicated memory */
if (!(adev->flags & AMD_IS_APU) &&
(adev->asic_type != CHIP_VEGA10)) {
ret = device_create_file(adev->dev,
&dev_attr_mem_busy_percent);
if (ret) {
DRM_ERROR("failed to create device file "
"mem_busy_percent\n");
return ret;
}
}
/* PCIe Perf counters won't work on APU nodes */
if (!(adev->flags & AMD_IS_APU)) {
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
if (ret) {
DRM_ERROR("failed to create device file pcie_bw\n");
return ret;
}
}
if (adev->unique_id)
ret = device_create_file(adev->dev, &dev_attr_unique_id);
if (ret) {
DRM_ERROR("failed to create device file unique_id\n");
return ret;
}
if ((adev->asic_type >= CHIP_VEGA10) &&
!(adev->flags & AMD_IS_APU)) {
ret = device_create_file(adev->dev,
&dev_attr_pp_features);
if (ret) {
DRM_ERROR("failed to create device file "
"pp_features\n");
return ret;
}
}
adev->pm.sysfs_initialized = true;
@ -3430,51 +3404,15 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
{
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
if (adev->pm.dpm_enabled == 0)
return;
if (adev->pm.int_hwmon_dev)
hwmon_device_unregister(adev->pm.int_hwmon_dev);
device_remove_file(adev->dev, &dev_attr_power_dpm_state);
device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
device_remove_file(adev->dev, &dev_attr_pp_num_states);
device_remove_file(adev->dev, &dev_attr_pp_cur_state);
device_remove_file(adev->dev, &dev_attr_pp_force_state);
device_remove_file(adev->dev, &dev_attr_pp_table);
device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
if (adev->asic_type >= CHIP_VEGA10) {
device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk);
if (adev->asic_type != CHIP_ARCTURUS)
device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
}
if (adev->asic_type != CHIP_ARCTURUS)
device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
if (adev->asic_type >= CHIP_VEGA20)
device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk);
device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
device_remove_file(adev->dev,
&dev_attr_pp_power_profile_mode);
if ((is_support_sw_smu(adev) && adev->smu.od_enabled) ||
(!is_support_sw_smu(adev) && hwmgr->od_enabled))
device_remove_file(adev->dev,
&dev_attr_pp_od_clk_voltage);
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
if (!(adev->flags & AMD_IS_APU) &&
(adev->asic_type != CHIP_VEGA10))
device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
if (!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_pcie_bw);
if (adev->unique_id)
device_remove_file(adev->dev, &dev_attr_unique_id);
if ((adev->asic_type >= CHIP_VEGA10) &&
!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_pp_features);
amdgpu_device_attr_remove_groups(adev,
amdgpu_device_attrs,
ARRAY_SIZE(amdgpu_device_attrs));
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)

View File

@ -30,6 +30,52 @@ struct cg_flag_name
const char *name;
};
enum amdgpu_device_attr_flags {
ATTR_FLAG_BASIC = (1 << 0),
ATTR_FLAG_ONEVF = (1 << 16),
};
#define ATTR_FLAG_TYPE_MASK (0x0000ffff)
#define ATTR_FLAG_MODE_MASK (0xffff0000)
#define ATTR_FLAG_MASK_ALL (0xffffffff)
enum amdgpu_device_attr_states {
ATTR_STATE_UNSUPPORTED = 0,
ATTR_STATE_SUPPORTED,
};
struct amdgpu_device_attr {
struct device_attribute dev_attr;
enum amdgpu_device_attr_flags flags;
enum amdgpu_device_attr_states states;
int (*attr_update)(struct amdgpu_device *adev,
struct amdgpu_device_attr* attr,
uint32_t mask);
};
#define to_amdgpu_device_attr(_dev_attr) \
container_of(_dev_attr, struct amdgpu_device_attr, dev_attr)
#define __AMDGPU_DEVICE_ATTR(_name, _mode, _show, _store, _flags, ...) \
{ .dev_attr = __ATTR(_name, _mode, _show, _store), \
.flags = _flags, \
.states = ATTR_STATE_SUPPORTED, \
##__VA_ARGS__, }
#define AMDGPU_DEVICE_ATTR(_name, _mode, _flags, ...) \
__AMDGPU_DEVICE_ATTR(_name, _mode, \
amdgpu_get_##_name, amdgpu_set_##_name, \
_flags, ##__VA_ARGS__)
#define AMDGPU_DEVICE_ATTR_RW(_name, _flags, ...) \
AMDGPU_DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
_flags, ##__VA_ARGS__)
#define AMDGPU_DEVICE_ATTR_RO(_name, _flags, ...) \
__AMDGPU_DEVICE_ATTR(_name, S_IRUGO, \
amdgpu_get_##_name, NULL, \
_flags, ##__VA_ARGS__)
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev);
int amdgpu_pm_sysfs_init(struct amdgpu_device *adev);
int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev);

View File

@ -811,6 +811,32 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
return 0;
}
/* Trigger XGMI/WAFL error */
int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
struct ta_ras_trigger_error_input *block_info)
{
int ret;
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
dev_warn(adev->dev, "Failed to disallow XGMI power down");
ret = psp_ras_trigger_error(&adev->psp, block_info);
if (amdgpu_ras_intr_triggered())
return ret;
if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
dev_warn(adev->dev, "Failed to allow XGMI power down");
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to allow df cstate");
return ret;
}
/* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@ -844,10 +870,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
break;
case AMDGPU_RAS_BLOCK__UMC:
case AMDGPU_RAS_BLOCK__MMHUB:
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
case AMDGPU_RAS_BLOCK__PCIE_BIF:
ret = psp_ras_trigger_error(&adev->psp, &block_info);
break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
break;
default:
dev_info(adev->dev, "%s error injection is not supported yet\n",
ras_block_str(info->head.block));

View File

@ -195,6 +195,7 @@ struct amdgpu_ring_funcs {
/* Try to soft recover the ring to make the fence signal */
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring);
};
struct amdgpu_ring {

View File

@ -370,3 +370,19 @@ void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
}
enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev)
{
enum amdgpu_sriov_vf_mode mode;
if (amdgpu_sriov_vf(adev)) {
if (amdgpu_sriov_is_pp_one_vf(adev))
mode = SRIOV_VF_MODE_ONE_VF;
else
mode = SRIOV_VF_MODE_MULTI_VF;
} else {
mode = SRIOV_VF_MODE_BARE_METAL;
}
return mode;
}

View File

@ -35,6 +35,12 @@
/* tonga/fiji use this offset */
#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,
SRIOV_VF_MODE_ONE_VF,
SRIOV_VF_MODE_MULTI_VF,
};
struct amdgpu_mm_table {
struct amdgpu_bo *bo;
uint32_t *cpu_addr;
@ -323,4 +329,6 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev);
bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev);
#endif

View File

@ -441,7 +441,7 @@ out:
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
{
int ret = -EINVAL;
int ret;
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,

View File

@ -450,7 +450,7 @@
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
#define PACKET3_AQUIRE_MEM 0x58
#define PACKET3_ACQUIRE_MEM 0x58
#define PACKET3_REWIND 0x59
#define PACKET3_LOAD_UCONFIG_REG 0x5E
#define PACKET3_LOAD_SH_REG 0x5F

View File

@ -4577,13 +4577,11 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
/* amdgpu_mm_wreg_mmio_rlc will fall back to mmio if doesn't support rlcg_write */
amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
adev->gfx.rlc.clear_state_gpu_addr >> 32, 0);
amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc, 0);
amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
adev->gfx.rlc.clear_state_size, 0);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
return 0;
}
@ -5192,7 +5190,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp, 0);
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
@ -8022,6 +8020,29 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
{
const unsigned int gcr_cntl =
PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@ -8069,7 +8090,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2, /* SWITCH_BUFFER */
2 + /* SWITCH_BUFFER */
8, /* gfx_v10_0_emit_mem_sync */
.emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
.emit_fence = gfx_v10_0_ring_emit_fence,
@ -8091,6 +8113,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@ -8110,7 +8133,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
8, /* gfx_v10_0_emit_mem_sync */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence,
@ -8125,6 +8149,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {

View File

@ -3465,6 +3465,18 @@ static int gfx_v6_0_set_powergating_state(void *handle,
return 0;
}
static void gfx_v6_0_emit_mem_sync(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
PACKET3_TC_ACTION_ENA |
PACKET3_SH_KCACHE_ACTION_ENA |
PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
}
static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.name = "gfx_v6_0",
.early_init = gfx_v6_0_early_init,
@ -3495,7 +3507,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */
5, /* SURFACE_SYNC */
.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
.emit_ib = gfx_v6_0_ring_emit_ib,
.emit_fence = gfx_v6_0_ring_emit_fence,
@ -3506,6 +3519,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
.insert_nop = amdgpu_ring_insert_nop,
.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
.emit_wreg = gfx_v6_0_ring_emit_wreg,
.emit_mem_sync = gfx_v6_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
@ -3519,7 +3533,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
5 + 5 + /* hdp flush / invalidate */
7 + /* gfx_v6_0_ring_emit_pipeline_sync */
SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
5, /* SURFACE_SYNC */
.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
.emit_ib = gfx_v6_0_ring_emit_ib,
.emit_fence = gfx_v6_0_ring_emit_fence,
@ -3529,6 +3544,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
.test_ib = gfx_v6_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.emit_wreg = gfx_v6_0_ring_emit_wreg,
.emit_mem_sync = gfx_v6_0_emit_mem_sync,
};
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)

View File

@ -4998,6 +4998,32 @@ static int gfx_v7_0_set_powergating_state(void *handle,
return 0;
}
static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
PACKET3_TC_ACTION_ENA |
PACKET3_SH_KCACHE_ACTION_ENA |
PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
}
static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
PACKET3_TC_ACTION_ENA |
PACKET3_SH_KCACHE_ACTION_ENA |
PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
}
static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
.name = "gfx_v7_0",
.early_init = gfx_v7_0_early_init,
@ -5030,7 +5056,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
5, /* SURFACE_SYNC */
.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
@ -5045,6 +5072,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
.soft_recovery = gfx_v7_0_ring_soft_recovery,
.emit_mem_sync = gfx_v7_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@ -5061,7 +5089,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5 + /* hdp invalidate */
7 + /* gfx_v7_0_ring_emit_pipeline_sync */
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
7, /* gfx_v7_0_emit_mem_sync_compute */
.emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
.emit_fence = gfx_v7_0_ring_emit_fence_compute,
@ -5074,6 +5103,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
.emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
};
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)

View File

@ -6817,6 +6817,34 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
PACKET3_TC_ACTION_ENA |
PACKET3_SH_KCACHE_ACTION_ENA |
PACKET3_SH_ICACHE_ACTION_ENA |
PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
}
static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
PACKET3_TC_ACTION_ENA |
PACKET3_SH_KCACHE_ACTION_ENA |
PACKET3_SH_ICACHE_ACTION_ENA |
PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
}
static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
.name = "gfx_v8_0",
.early_init = gfx_v8_0_early_init,
@ -6863,7 +6891,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
12 + 12 + /* FENCE x2 */
2, /* SWITCH_BUFFER */
2 + /* SWITCH_BUFFER */
5, /* SURFACE_SYNC */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
@ -6881,6 +6910,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
.soft_recovery = gfx_v8_0_ring_soft_recovery,
.emit_mem_sync = gfx_v8_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@ -6897,7 +6927,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5 + /* hdp_invalidate */
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7, /* gfx_v8_0_emit_mem_sync_compute */
.emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
@ -6910,6 +6941,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {

View File

@ -6634,6 +6634,25 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
return 0;
}
static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
{
const unsigned int cp_coher_cntl =
PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
}
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@ -6680,7 +6699,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2, /* SWITCH_BUFFER */
2 + /* SWITCH_BUFFER */
7, /* gfx_v9_0_emit_mem_sync */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
.emit_fence = gfx_v9_0_ring_emit_fence,
@ -6701,6 +6721,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@ -6720,7 +6741,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7, /* gfx_v9_0_emit_mem_sync */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
.emit_fence = gfx_v9_0_ring_emit_fence,
@ -6735,6 +6757,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {

View File

@ -256,6 +256,54 @@
#define PACKET3_BLK_CNTX_UPDATE 0x53
#define PACKET3_INCR_UPDT_STATE 0x55
#define PACKET3_ACQUIRE_MEM 0x58
/* 1. HEADER
* 2. COHER_CNTL [30:0]
* 2.1 ENGINE_SEL [31:31]
* 2. COHER_SIZE [31:0]
* 3. COHER_SIZE_HI [7:0]
* 4. COHER_BASE_LO [31:0]
* 5. COHER_BASE_HI [23:0]
* 7. POLL_INTERVAL [15:0]
* 8. GCR_CNTL [18:0]
*/
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)
/*
* 0:NOP
* 1:ALL
* 2:RANGE
* 3:FIRST_LAST
*/
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
/*
* 0:ALL
* 1:reserved
* 2:RANGE
* 3:FIRST_LAST
*/
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
/*
* 0:ALL
* 1:VOL
* 2:RANGE
* 3:FIRST_LAST
*/
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x) ((x) << 13)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
#define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
/*
* 0: PARALLEL
* 1: FORWARD
* 2: REVERSE
*/
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
#define PACKET3_REWIND 0x59
#define PACKET3_INTERRUPT 0x5A
#define PACKET3_GEN_PDEPTE 0x5B

View File

@ -253,7 +253,30 @@
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
#define PACKET3_AQUIRE_MEM 0x58
#define PACKET3_ACQUIRE_MEM 0x58
/* 1. HEADER
* 2. COHER_CNTL [30:0]
* 2.1 ENGINE_SEL [31:31]
* 3. COHER_SIZE [31:0]
* 4. COHER_SIZE_HI [7:0]
* 5. COHER_BASE_LO [31:0]
* 6. COHER_BASE_HI [23:0]
* 7. POLL_INTERVAL [15:0]
*/
/* COHER_CNTL fields for CP_COHER_CNTL */
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x) ((x) << 3)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x) ((x) << 4)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x) ((x) << 15)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(x) ((x) << 18)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(x) ((x) << 22)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(x) ((x) << 23)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_CB_ACTION_ENA(x) ((x) << 25)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_DB_ACTION_ENA(x) ((x) << 26)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(x) ((x) << 27)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_VOL_ACTION_ENA(x) ((x) << 28)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
#define PACKET3_REWIND 0x59
#define PACKET3_LOAD_UCONFIG_REG 0x5E
#define PACKET3_LOAD_SH_REG 0x5F

View File

@ -332,7 +332,7 @@
# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
#define PACKET3_AQUIRE_MEM 0x58
#define PACKET3_ACQUIRE_MEM 0x58
#define PACKET3_REWIND 0x59
#define PACKET3_LOAD_UCONFIG_REG 0x5E
#define PACKET3_LOAD_SH_REG 0x5F

View File

@ -2100,6 +2100,28 @@ int smu_set_df_cstate(struct smu_context *smu,
return ret;
}
int smu_allow_xgmi_power_down(struct smu_context *smu, bool en)
{
struct amdgpu_device *adev = smu->adev;
int ret = 0;
if (!adev->pm.dpm_enabled)
return -EINVAL;
if (!smu->ppt_funcs || !smu->ppt_funcs->allow_xgmi_power_down)
return 0;
mutex_lock(&smu->mutex);
ret = smu->ppt_funcs->allow_xgmi_power_down(smu, en);
if (ret)
pr_err("[AllowXgmiPowerDown] failed!\n");
mutex_unlock(&smu->mutex);
return ret;
}
int smu_write_watermarks_table(struct smu_context *smu)
{
void *watermarks_table = smu->smu_table.watermarks_table;

View File

@ -128,6 +128,7 @@ static struct smu_11_0_cmn2aisc_mapping arcturus_message_map[SMU_MSG_MAX_COUNT]
MSG_MAP(SetXgmiMode, PPSMC_MSG_SetXgmiMode),
MSG_MAP(SetMemoryChannelEnable, PPSMC_MSG_SetMemoryChannelEnable),
MSG_MAP(DFCstateControl, PPSMC_MSG_DFCstateControl),
MSG_MAP(GmiPwrDnControl, PPSMC_MSG_GmiPwrDnControl),
};
static struct smu_11_0_cmn2aisc_mapping arcturus_clk_map[SMU_CLK_COUNT] = {
@ -2286,6 +2287,35 @@ static int arcturus_set_df_cstate(struct smu_context *smu,
return smu_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state, NULL);
}
static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en)
{
uint32_t smu_version;
int ret;
ret = smu_get_smc_version(smu, NULL, &smu_version);
if (ret) {
pr_err("Failed to get smu version!\n");
return ret;
}
/* PPSMC_MSG_GmiPwrDnControl is supported by 54.23.0 and onwards */
if (smu_version < 0x00361700) {
pr_err("XGMI power down control is only supported by PMFW 54.23.0 and onwards\n");
return -EINVAL;
}
if (en)
return smu_send_smc_msg_with_param(smu,
SMU_MSG_GmiPwrDnControl,
1,
NULL);
return smu_send_smc_msg_with_param(smu,
SMU_MSG_GmiPwrDnControl,
0,
NULL);
}
static const struct pptable_funcs arcturus_ppt_funcs = {
/* translate smu index into arcturus specific index */
.get_smu_msg_index = arcturus_get_smu_msg_index,
@ -2379,6 +2409,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
.override_pcie_parameters = smu_v11_0_override_pcie_parameters,
.get_pptable_power_limit = arcturus_get_pptable_power_limit,
.set_df_cstate = arcturus_set_df_cstate,
.allow_xgmi_power_down = arcturus_allow_xgmi_power_down,
};
void arcturus_set_ppt_funcs(struct smu_context *smu)

View File

@ -22,6 +22,7 @@
*/
#include <linux/pci.h>
#include <linux/reboot.h>
#include "hwmgr.h"
#include "pp_debug.h"
@ -595,37 +596,61 @@ int phm_irq_process(struct amdgpu_device *adev,
uint32_t src_id = entry->src_id;
if (client_id == AMDGPU_IRQ_CLIENTID_LEGACY) {
if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH)
if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH) {
pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n",
PCI_BUS_NUM(adev->pdev->devfn),
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW)
/*
* SW CTF just occurred.
* Try to do a graceful shutdown to prevent further damage.
*/
dev_emerg(adev->dev, "System is going to shutdown due to SW CTF!\n");
orderly_poweroff(true);
} else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW)
pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n",
PCI_BUS_NUM(adev->pdev->devfn),
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
else if (src_id == VISLANDS30_IV_SRCID_GPIO_19)
else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) {
pr_warn("GPU Critical Temperature Fault detected on PCIe %d:%d.%d!\n",
PCI_BUS_NUM(adev->pdev->devfn),
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
/*
* HW CTF just occurred. Shutdown to prevent further damage.
*/
dev_emerg(adev->dev, "System is going to shutdown due to HW CTF!\n");
orderly_poweroff(true);
}
} else if (client_id == SOC15_IH_CLIENTID_THM) {
if (src_id == 0)
if (src_id == 0) {
pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n",
PCI_BUS_NUM(adev->pdev->devfn),
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
else
/*
* SW CTF just occurred.
* Try to do a graceful shutdown to prevent further damage.
*/
dev_emerg(adev->dev, "System is going to shutdown due to SW CTF!\n");
orderly_poweroff(true);
} else
pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n",
PCI_BUS_NUM(adev->pdev->devfn),
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
} else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO)
} else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) {
pr_warn("GPU Critical Temperature Fault detected on PCIe %d:%d.%d!\n",
PCI_BUS_NUM(adev->pdev->devfn),
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
/*
* HW CTF just occurred. Shutdown to prevent further damage.
*/
dev_emerg(adev->dev, "System is going to shutdown due to HW CTF!\n");
orderly_poweroff(true);
}
return 0;
}

View File

@ -491,6 +491,7 @@ struct pptable_funcs {
int (*get_dpm_clk_limited)(struct smu_context *smu, enum smu_clk_type clk_type,
uint32_t dpm_level, uint32_t *freq);
int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state);
int (*allow_xgmi_power_down)(struct smu_context *smu, bool en);
int (*update_pcie_parameters)(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap);
int (*i2c_eeprom_init)(struct i2c_adapter *control);
void (*i2c_eeprom_fini)(struct i2c_adapter *control);
@ -731,6 +732,7 @@ int smu_set_mp1_state(struct smu_context *smu,
enum pp_mp1_state mp1_state);
int smu_set_df_cstate(struct smu_context *smu,
enum pp_df_cstate state);
int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
int smu_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
struct pp_smu_nv_clock_table *max_clocks);

View File

@ -114,7 +114,8 @@
#define PPSMC_MSG_SetNumBadHbmPagesRetired 0x3A
#define PPSMC_MSG_DFCstateControl 0x3B
#define PPSMC_Message_Count 0x3C
#define PPSMC_MSG_GmiPwrDnControl 0x3D
#define PPSMC_Message_Count 0x3E
typedef uint32_t PPSMC_Result;
typedef uint32_t PPSMC_Msg;

View File

@ -170,6 +170,7 @@
__SMU_DUMMY_MAP(SetSoftMinJpeg), \
__SMU_DUMMY_MAP(SetHardMinFclkByFreq), \
__SMU_DUMMY_MAP(DFCstateControl), \
__SMU_DUMMY_MAP(GmiPwrDnControl), \
__SMU_DUMMY_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE), \
__SMU_DUMMY_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE), \

View File

@ -23,6 +23,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/reboot.h>
#define SMU_11_0_PARTIAL_PPTABLE
@ -1547,12 +1548,19 @@ static int smu_v11_0_ack_ac_dc_interrupt(struct smu_context *smu)
#define THM_11_0__SRCID__THM_DIG_THERM_L2H 0 /* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH */
#define THM_11_0__SRCID__THM_DIG_THERM_H2L 1 /* ASIC_TEMP < CG_THERMAL_INT.DIG_THERM_INTL */
#define SMUIO_11_0__SRCID__SMUIO_GPIO19 83
static int smu_v11_0_irq_process(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
uint32_t client_id = entry->client_id;
uint32_t src_id = entry->src_id;
/*
* ctxid is used to distinguish different
* events for SMCToHost interrupt.
*/
uint32_t ctxid = entry->src_data[0];
if (client_id == SOC15_IH_CLIENTID_THM) {
switch (src_id) {
@ -1561,6 +1569,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
PCI_BUS_NUM(adev->pdev->devfn),
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
/*
* SW CTF just occurred.
* Try to do a graceful shutdown to prevent further damage.
*/
dev_emerg(adev->dev, "System is going to shutdown due to SW CTF!\n");
orderly_poweroff(true);
break;
case THM_11_0__SRCID__THM_DIG_THERM_H2L:
pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n",
@ -1575,11 +1589,30 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
break;
}
} else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) {
pr_warn("GPU Critical Temperature Fault detected on PCIe %d:%d.%d!\n",
PCI_BUS_NUM(adev->pdev->devfn),
PCI_SLOT(adev->pdev->devfn),
PCI_FUNC(adev->pdev->devfn));
/*
* HW CTF just occurred. Shutdown to prevent further damage.
*/
dev_emerg(adev->dev, "System is going to shutdown due to HW CTF!\n");
orderly_poweroff(true);
} else if (client_id == SOC15_IH_CLIENTID_MP1) {
if (src_id == 0xfe)
smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
if (src_id == 0xfe) {
switch (ctxid) {
case 0x3:
dev_dbg(adev->dev, "Switched to AC mode!\n");
smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
break;
case 0x4:
dev_dbg(adev->dev, "Switched to DC mode!\n");
smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
break;
}
}
}
return 0;
@ -1619,6 +1652,13 @@ int smu_v11_0_register_irq_handler(struct smu_context *smu)
if (ret)
return ret;
/* Register CTF(GPIO_19) interrupt */
ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_ROM_SMUIO,
SMUIO_11_0__SRCID__SMUIO_GPIO19,
irq_src);
if (ret)
return ret;
ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1,
0xfe,
irq_src);

View File

@ -602,6 +602,10 @@ union drm_amdgpu_cs {
*/
#define AMDGPU_IB_FLAGS_SECURE (1 << 5)
/* Tell KMD to flush and invalidate caches
*/
#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6)
struct drm_amdgpu_cs_chunk_ib {
__u32 _pad;
/** AMDGPU_IB_FLAG_* */