Merge branch 'drm-next-4.15' of git://people.freedesktop.org/~agd5f/linux into drm-next

Last set of features for 4.15.  Highlights:
- Add a bo flag to allow buffers to opt out of implicit sync
- Add ctx priority setting interface
- Lots more powerplay cleanups
- Start to plumb through vram lost infrastructure for gpu reset
- ttm support for huge pages
- misc cleanups and bug fixes

* 'drm-next-4.15' of git://people.freedesktop.org/~agd5f/linux: (73 commits)
  drm/amd/powerplay: Place the constant on the right side of the test
  drm/amd/powerplay: Remove useless variable
  drm/amd/powerplay: Don't cast kzalloc() return value
  drm/amdgpu: allow GTT overcommit during bind
  drm/amdgpu: linear validate first then bind to GART
  drm/amd/pp: Fix overflow when setup decf/pix/disp dpm table.
  drm/amd/pp: thermal control not enabled on vega10.
  drm/amdgpu: busywait KIQ register accessing (v4)
  drm/amdgpu: report more amdgpu_fence_info
  drm/amdgpu:don't check soft_reset for sriov
  drm/amdgpu:fix duplicated setting job's vram_lost
  drm/amdgpu:reduce wb to 512 slot
  drm/amdgpu: fix regresstion on SR-IOV gpu reset failed
  drm/amd/powerplay: Tidy up cz_dpm_powerup_vce()
  drm/amd/powerplay: Tidy up cz_dpm_powerdown_vce()
  drm/amd/powerplay: Tidy up cz_dpm_update_vce_dpm()
  drm/amd/powerplay: Tidy up cz_dpm_update_uvd_dpm()
  drm/amd/powerplay: Tidy up cz_dpm_powerup_uvd()
  drm/amd/powerplay: Tidy up cz_dpm_powerdown_uvd()
  drm/amd/powerplay: Tidy up cz_start_dpm()
  ...
This commit is contained in:
Dave Airlie 2017-10-20 10:47:19 +10:00
commit 6585d4274b
65 changed files with 12037 additions and 13397 deletions

View File

@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_queue_mgr.o amdgpu_vf_error.o
amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \

View File

@ -732,10 +732,14 @@ struct amdgpu_ctx {
struct amdgpu_device *adev;
struct amdgpu_queue_mgr queue_mgr;
unsigned reset_counter;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
struct dma_fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
bool preamble_presented;
bool preamble_presented;
enum amd_sched_priority init_priority;
enum amd_sched_priority override_priority;
struct mutex lock;
};
struct amdgpu_ctx_mgr {
@ -752,13 +756,18 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct dma_fence *fence, uint64_t *seq);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq);
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum amd_sched_priority priority);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
/*
* file private structure
*/
@ -770,7 +779,6 @@ struct amdgpu_fpriv {
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
u32 vram_lost_counter;
};
/*
@ -871,7 +879,7 @@ struct amdgpu_mec {
struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo *eop_obj;
struct mutex ring_mutex;
spinlock_t ring_lock;
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
};
@ -1035,6 +1043,10 @@ struct amdgpu_gfx {
bool in_suspend;
/* NGG */
struct amdgpu_ngg ngg;
/* pipe reservation */
struct mutex pipe_reserve_mutex;
DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@ -1113,6 +1125,7 @@ struct amdgpu_job {
uint32_t gds_base, gds_size;
uint32_t gws_base, gws_size;
uint32_t oa_base, oa_size;
uint32_t vram_lost_counter;
/* user fence handling */
uint64_t uf_addr;
@ -1138,7 +1151,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
/*
* Writeback
*/
#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */
#define AMDGPU_MAX_WB 512 /* Reserve at most 512 WB slots for amdgpu-owned rings. */
struct amdgpu_wb {
struct amdgpu_bo *wb_obj;
@ -1378,6 +1391,18 @@ struct amdgpu_atcs {
struct amdgpu_atcs_functions functions;
};
/*
* Firmware VRAM reservation
*/
struct amdgpu_fw_vram_usage {
u64 start_offset;
u64 size;
struct amdgpu_bo *reserved_bo;
void *va;
};
int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev);
/*
* CGS
*/
@ -1582,6 +1607,8 @@ struct amdgpu_device {
struct delayed_work late_init_work;
struct amdgpu_virt virt;
/* firmware VRAM reservation */
struct amdgpu_fw_vram_usage fw_vram_usage;
/* link all shadow bo */
struct list_head shadow_list;
@ -1833,8 +1860,6 @@ static inline bool amdgpu_has_atpx(void) { return false; }
extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
extern const int amdgpu_max_kms_ioctl;
bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv);
int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
void amdgpu_driver_unload_kms(struct drm_device *dev);
void amdgpu_driver_lastclose_kms(struct drm_device *dev);

View File

@ -1807,6 +1807,8 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
uint16_t data_offset;
int usage_bytes = 0;
struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
u64 start_addr;
u64 size;
if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
@ -1815,7 +1817,21 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
start_addr = firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware;
size = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb;
if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
/* Firmware request VRAM reservation for SR-IOV */
adev->fw_vram_usage.start_offset = (start_addr &
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
adev->fw_vram_usage.size = size << 10;
/* Use the default scratch size */
usage_bytes = 0;
} else {
usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
}
}
ctx->scratch_size_bytes = 0;
if (usage_bytes == 0)

View File

@ -90,12 +90,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
goto free_chunk;
}
mutex_lock(&p->ctx->lock);
/* get chunks */
chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
sizeof(uint64_t)*cs->in.num_chunks)) {
ret = -EFAULT;
goto put_ctx;
goto free_chunk;
}
p->nchunks = cs->in.num_chunks;
@ -103,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
GFP_KERNEL);
if (!p->chunks) {
ret = -ENOMEM;
goto put_ctx;
goto free_chunk;
}
for (i = 0; i < p->nchunks; i++) {
@ -170,6 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
if (ret)
goto free_all_kdata;
if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
ret = -ECANCELED;
goto free_all_kdata;
}
if (p->uf_entry.robj)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
@ -183,8 +190,6 @@ free_partial_kdata:
kfree(p->chunks);
p->chunks = NULL;
p->nchunks = 0;
put_ctx:
amdgpu_ctx_put(p->ctx);
free_chunk:
kfree(chunk_array);
@ -705,7 +710,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
list_for_each_entry(e, &p->validated, tv.head) {
struct reservation_object *resv = e->robj->tbo.resv;
r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
amdgpu_bo_explicit_sync(e->robj));
if (r)
return r;
@ -736,8 +742,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
dma_fence_put(parser->fence);
if (parser->ctx)
if (parser->ctx) {
mutex_unlock(&parser->ctx->lock);
amdgpu_ctx_put(parser->ctx);
}
if (parser->bo_list)
amdgpu_bo_list_put(parser->bo_list);
@ -844,14 +852,58 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_ring *ring = p->job->ring;
int i, r;
int r;
/* Only for UVD/VCE VM emulation */
if (ring->funcs->parse_cs) {
for (i = 0; i < p->job->num_ibs; i++) {
r = amdgpu_ring_parse_cs(ring, p, i);
if (p->job->ring->funcs->parse_cs) {
unsigned i, j;
for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_bo_va_mapping *m;
struct amdgpu_bo *aobj = NULL;
struct amdgpu_cs_chunk *chunk;
struct amdgpu_ib *ib;
uint64_t offset;
uint8_t *kptr;
chunk = &p->chunks[i];
ib = &p->job->ibs[j];
chunk_ib = chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
&aobj, &m);
if (r) {
DRM_ERROR("IB va_start is invalid\n");
return r;
}
if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
DRM_ERROR("IB va_start+ib_bytes is invalid\n");
return -EINVAL;
}
/* the IB should be reserved at this point */
r = amdgpu_bo_kmap(aobj, (void **)&kptr);
if (r) {
return r;
}
offset = m->start * AMDGPU_GPU_PAGE_SIZE;
kptr += chunk_ib->va_start - offset;
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
amdgpu_bo_kunmap(aobj);
r = amdgpu_ring_parse_cs(ring, p, j);
if (r)
return r;
j++;
}
}
@ -918,54 +970,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
parser->job->ring = ring;
if (ring->funcs->parse_cs) {
struct amdgpu_bo_va_mapping *m;
struct amdgpu_bo *aobj = NULL;
uint64_t offset;
uint8_t *kptr;
r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
&aobj, &m);
if (r) {
DRM_ERROR("IB va_start is invalid\n");
return r;
}
if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
DRM_ERROR("IB va_start+ib_bytes is invalid\n");
return -EINVAL;
}
/* the IB should be reserved at this point */
r = amdgpu_bo_kmap(aobj, (void **)&kptr);
if (r) {
return r;
}
offset = m->start * AMDGPU_GPU_PAGE_SIZE;
kptr += chunk_ib->va_start - offset;
r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
}
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
amdgpu_bo_kunmap(aobj);
} else {
r = amdgpu_ib_get(adev, vm, 0, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
}
r = amdgpu_ib_get(adev, vm,
ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
}
ib->gpu_addr = chunk_ib->va_start;
ib->length_dw = chunk_ib->ib_bytes / 4;
ib->flags = chunk_ib->flags;
j++;
}
@ -975,7 +991,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL;
return 0;
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
}
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@ -1176,6 +1192,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->uf_sequence = seq;
amdgpu_job_free_resources(job);
amdgpu_ring_priority_get(job->ring,
amd_sched_get_job_priority(&job->base));
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base);
@ -1189,7 +1207,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
@ -1197,8 +1214,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!adev->accel_working)
return -EBUSY;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
parser.adev = adev;
parser.filp = filp;
@ -1209,6 +1224,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out;
}
r = amdgpu_cs_ib_fill(adev, &parser);
if (r)
goto out;
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
@ -1219,9 +1238,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
reserved_buffers = true;
r = amdgpu_cs_ib_fill(adev, &parser);
if (r)
goto out;
r = amdgpu_cs_dependencies(adev, &parser);
if (r) {
@ -1257,16 +1273,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
{
union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
long r;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
if (ctx == NULL)
return -EINVAL;
@ -1284,6 +1296,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
r = PTR_ERR(fence);
else if (fence) {
r = dma_fence_wait_timeout(fence, true, timeout);
if (r > 0 && fence->error)
r = fence->error;
dma_fence_put(fence);
} else
r = 1;
@ -1335,16 +1349,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_fence_to_handle *info = data;
struct dma_fence *fence;
struct drm_syncobj *syncobj;
struct sync_file *sync_file;
int fd, r;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
if (IS_ERR(fence))
return PTR_ERR(fence);
@ -1425,6 +1435,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
if (r == 0)
break;
if (fence->error)
return fence->error;
}
memset(wait, 0, sizeof(*wait));
@ -1485,7 +1498,7 @@ out:
wait->out.status = (r > 0);
wait->out.first_signaled = first;
/* set return value 0 to indicate success */
r = 0;
r = array[first]->error;
err_free_fence_array:
for (i = 0; i < fence_count; i++)
@ -1506,15 +1519,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_wait_fences *wait = data;
uint32_t fence_count = wait->in.fence_count;
struct drm_amdgpu_fence *fences_user;
struct drm_amdgpu_fence *fences;
int r;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
/* Get the fences from userspace */
fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
GFP_KERNEL);
@ -1572,14 +1582,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
return -EINVAL;
r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
if (unlikely(r))
return r;
if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false,
false);
if (r)
return r;
}
if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
return 0;
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false);
return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
}

View File

@ -23,13 +23,41 @@
*/
#include <drm/drmP.h>
#include <drm/drm_auth.h>
#include "amdgpu.h"
#include "amdgpu_sched.h"
static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum amd_sched_priority priority)
{
/* NORMAL and below are accessible by everyone */
if (priority <= AMD_SCHED_PRIORITY_NORMAL)
return 0;
if (capable(CAP_SYS_NICE))
return 0;
if (drm_is_current_master(filp))
return 0;
return -EACCES;
}
static int amdgpu_ctx_init(struct amdgpu_device *adev,
enum amd_sched_priority priority,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
{
unsigned i, j;
int r;
if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX)
return -EINVAL;
r = amdgpu_ctx_priority_permit(filp, priority);
if (r)
return r;
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
kref_init(&ctx->refcount);
@ -39,19 +67,24 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
if (!ctx->fences)
return -ENOMEM;
mutex_init(&ctx->lock);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ctx->rings[i].sequence = 1;
ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
}
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
ctx->init_priority = priority;
ctx->override_priority = AMD_SCHED_PRIORITY_UNSET;
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amdgpu_ring *ring = adev->rings[i];
struct amd_sched_rq *rq;
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
rq = &ring->sched.sched_rq[priority];
if (ring == &adev->gfx.kiq.ring)
continue;
@ -96,10 +129,14 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
&ctx->rings[i].entity);
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
mutex_destroy(&ctx->lock);
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct drm_file *filp,
enum amd_sched_priority priority,
uint32_t *id)
{
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
@ -117,8 +154,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
kfree(ctx);
return r;
}
*id = (uint32_t)r;
r = amdgpu_ctx_init(adev, ctx);
r = amdgpu_ctx_init(adev, priority, filp, ctx);
if (r) {
idr_remove(&mgr->ctx_handles, *id);
*id = 0;
@ -193,6 +231,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
{
int r;
uint32_t id;
enum amd_sched_priority priority;
union drm_amdgpu_ctx *args = data;
struct amdgpu_device *adev = dev->dev_private;
@ -200,10 +239,16 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
r = 0;
id = args->in.ctx_id;
priority = amdgpu_to_sched_priority(args->in.priority);
/* For backwards compatibility reasons, we need to accept
* ioctls with garbage in the priority field */
if (priority == AMD_SCHED_PRIORITY_INVALID)
priority = AMD_SCHED_PRIORITY_NORMAL;
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
r = amdgpu_ctx_alloc(adev, fpriv, &id);
r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
@ -256,12 +301,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
idx = seq & (amdgpu_sched_jobs - 1);
other = cring->fences[idx];
if (other) {
signed long r;
r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
if (r < 0)
return r;
}
if (other)
BUG_ON(!dma_fence_is_signaled(other));
dma_fence_get(fence);
@ -305,6 +346,51 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
return fence;
}
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum amd_sched_priority priority)
{
int i;
struct amdgpu_device *adev = ctx->adev;
struct amd_sched_rq *rq;
struct amd_sched_entity *entity;
struct amdgpu_ring *ring;
enum amd_sched_priority ctx_prio;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
for (i = 0; i < adev->num_rings; i++) {
ring = adev->rings[i];
entity = &ctx->rings[i].entity;
rq = &ring->sched.sched_rq[ctx_prio];
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
continue;
amd_sched_entity_set_rq(entity, rq);
}
}
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
{
struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
struct dma_fence *other = cring->fences[idx];
if (other) {
signed long r;
r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
if (r < 0) {
DRM_ERROR("Error (%ld) waiting for fence!\n", r);
return r;
}
}
return 0;
}
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
{
mutex_init(&mgr->lock);

View File

@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
{
uint32_t ret;
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
BUG_ON(in_interrupt());
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
return amdgpu_virt_kiq_rreg(adev, reg);
}
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
adev->last_mm_index = v;
}
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
BUG_ON(in_interrupt());
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
return amdgpu_virt_kiq_wreg(adev, reg, v);
}
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
@ -657,6 +653,81 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc)
mc->gart_size >> 20, mc->gart_start, mc->gart_end);
}
/*
* Firmware Reservation functions
*/
/**
* amdgpu_fw_reserve_vram_fini - free fw reserved vram
*
* @adev: amdgpu_device pointer
*
* free fw reserved vram if it has been reserved.
*/
void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
NULL, &adev->fw_vram_usage.va);
}
/**
* amdgpu_fw_reserve_vram_init - create bo vram reservation from fw
*
* @adev: amdgpu_device pointer
*
* create bo vram reservation from fw.
*/
int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev)
{
int r = 0;
u64 gpu_addr;
u64 vram_size = adev->mc.visible_vram_size;
adev->fw_vram_usage.va = NULL;
adev->fw_vram_usage.reserved_bo = NULL;
if (adev->fw_vram_usage.size > 0 &&
adev->fw_vram_usage.size <= vram_size) {
r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
PAGE_SIZE, true, 0,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
&adev->fw_vram_usage.reserved_bo);
if (r)
goto error_create;
r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
if (r)
goto error_reserve;
r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
AMDGPU_GEM_DOMAIN_VRAM,
adev->fw_vram_usage.start_offset,
(adev->fw_vram_usage.start_offset +
adev->fw_vram_usage.size), &gpu_addr);
if (r)
goto error_pin;
r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
&adev->fw_vram_usage.va);
if (r)
goto error_kmap;
amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
}
return r;
error_kmap:
amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
error_pin:
amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
error_reserve:
amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
error_create:
adev->fw_vram_usage.va = NULL;
adev->fw_vram_usage.reserved_bo = NULL;
return r;
}
/*
* GPU helpers function.
*/
@ -1604,7 +1675,6 @@ static int amdgpu_init(struct amdgpu_device *adev)
return r;
}
adev->ip_blocks[i].status.sw = true;
/* need to do gmc hw init early so we can allocate gpu mem */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
r = amdgpu_vram_scratch_init(adev);
@ -1635,11 +1705,6 @@ static int amdgpu_init(struct amdgpu_device *adev)
}
}
mutex_lock(&adev->firmware.mutex);
if (amdgpu_ucode_init_bo(adev))
adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
mutex_unlock(&adev->firmware.mutex);
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.sw)
continue;
@ -1775,8 +1840,6 @@ static int amdgpu_fini(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false;
}
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
amdgpu_ucode_fini_bo(adev);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.sw)
@ -2019,6 +2082,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->vm_manager.vm_pte_num_rings = 0;
adev->gart.gart_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
adev->smc_rreg = &amdgpu_invalid_rreg;
adev->smc_wreg = &amdgpu_invalid_wreg;
@ -2047,6 +2111,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->pm.mutex);
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex);
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
@ -2223,6 +2288,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
amdgpu_fbdev_init(adev);
r = amdgpu_pm_sysfs_init(adev);
@ -2300,6 +2368,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
amdgpu_ib_pool_fini(adev);
amdgpu_fw_reserve_vram_fini(adev);
amdgpu_fence_driver_fini(adev);
amdgpu_fbdev_fini(adev);
r = amdgpu_fini(adev);
@ -2552,6 +2621,9 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
int i;
bool asic_hang = false;
if (amdgpu_sriov_vf(adev))
return true;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;

View File

@ -71,9 +71,11 @@
* - 3.19.0 - Add support for UVD MJPEG decode
* - 3.20.0 - Add support for local BOs
* - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
* - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
* - 3.23.0 - Add query for VRAM lost counter
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 21
#define KMS_DRIVER_MINOR 23
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;

View File

@ -168,6 +168,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
return 0;
}
/**
* amdgpu_fence_emit_polling - emit a fence on the requeste ring
*
* @ring: ring the fence is associated with
* @s: resulting sequence number
*
* Emits a fence command on the requested ring (all asics).
* Used For polling fence.
* Returns 0 on success, -ENOMEM on failure.
*/
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
{
uint32_t seq;
if (!s)
return -EINVAL;
seq = ++ring->fence_drv.sync_seq;
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, AMDGPU_FENCE_FLAG_INT);
*s = seq;
return 0;
}
/**
* amdgpu_fence_schedule_fallback - schedule fallback check
*
@ -281,6 +307,30 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
return r;
}
/**
* amdgpu_fence_wait_polling - busy wait for givn sequence number
*
* @ring: ring index the fence is associated with
* @wait_seq: sequence number to wait
* @timeout: the timeout for waiting in usecs
*
* Wait for all fences on the requested ring to signal (all asics).
* Returns left time if no timeout, 0 or minus if timeout.
*/
signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout)
{
uint32_t seq;
do {
seq = amdgpu_fence_read(ring);
udelay(5);
timeout -= 5;
} while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
return timeout > 0 ? timeout : 0;
}
/**
* amdgpu_fence_count_emitted - get the count of emitted fences
*
@ -641,6 +691,19 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
atomic_read(&ring->fence_drv.last_seq));
seq_printf(m, "Last emitted 0x%08x\n",
ring->fence_drv.sync_seq);
if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
continue;
/* set in CP_VMID_PREEMPT and preemption occurred */
seq_printf(m, "Last preempted 0x%08x\n",
le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
/* set in CP_VMID_RESET and reset occurred */
seq_printf(m, "Last reset 0x%08x\n",
le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
/* Both preemption and reset occurred */
seq_printf(m, "Last both 0x%08x\n",
le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
}
return 0;
}

View File

@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_VRAM_CLEARED |
AMDGPU_GEM_CREATE_VM_ALWAYS_VALID))
AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
return -EINVAL;
/* reject invalid gem domains */
@ -577,11 +579,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->operation);
return -EINVAL;
}
if ((args->operation == AMDGPU_VA_OP_MAP) ||
(args->operation == AMDGPU_VA_OP_REPLACE)) {
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
}
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);

View File

@ -201,7 +201,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
int r = 0;
mutex_init(&kiq->ring_mutex);
spin_lock_init(&kiq->ring_lock);
r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
if (r)

View File

@ -169,7 +169,8 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
int r;
spin_lock(&mgr->lock);
if (atomic64_read(&mgr->available) < mem->num_pages) {
if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
atomic64_read(&mgr->available) < mem->num_pages) {
spin_unlock(&mgr->lock);
return 0;
}
@ -244,8 +245,9 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man,
uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man)
{
struct amdgpu_gtt_mgr *mgr = man->priv;
s64 result = man->size - atomic64_read(&mgr->available);
return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE;
return (result > 0 ? result : 0) * PAGE_SIZE;
}
/**
@ -265,7 +267,7 @@ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man,
drm_mm_print(&mgr->mm, printer);
spin_unlock(&mgr->lock);
drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
drm_printf(printer, "man size:%llu pages, gtt available:%lld pages, usage:%lluMB\n",
man->size, (u64)atomic64_read(&mgr->available),
amdgpu_gtt_mgr_usage(man) >> 20);
}

View File

@ -65,6 +65,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
amdgpu_sync_create(&(*job)->sync);
amdgpu_sync_create(&(*job)->dep_sync);
amdgpu_sync_create(&(*job)->sched_sync);
(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
return 0;
}
@ -103,6 +104,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
{
struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job));
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->dep_sync);
@ -139,6 +141,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
job->fence_ctx = entity->fence_context;
*f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
amdgpu_ring_priority_get(job->ring,
amd_sched_get_job_priority(&job->base));
amd_sched_entity_push_job(&job->base);
return 0;
@ -177,8 +181,8 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
{
struct dma_fence *fence = NULL;
struct amdgpu_device *adev;
struct amdgpu_job *job;
struct amdgpu_fpriv *fpriv = NULL;
int r;
if (!sched_job) {
@ -186,23 +190,25 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
return NULL;
}
job = to_amdgpu_job(sched_job);
adev = job->adev;
BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
trace_amdgpu_sched_run_job(job);
if (job->vm)
fpriv = container_of(job->vm, struct amdgpu_fpriv, vm);
/* skip ib schedule when vram is lost */
if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv))
if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) {
dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED);
DRM_ERROR("Skip scheduling IBs!\n");
else {
r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
} else {
r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
/* if gpu reset, hw fence will be replaced here */
dma_fence_put(job->fence);
job->fence = dma_fence_get(fence);
amdgpu_job_free_resources(job);
return fence;
}

View File

@ -28,6 +28,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include <drm/amdgpu_drm.h>
#include "amdgpu_sched.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
@ -269,7 +270,6 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_amdgpu_info *info = data;
struct amdgpu_mode_info *minfo = &adev->mode_info;
void __user *out = (void __user *)(uintptr_t)info->return_pointer;
@ -282,8 +282,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
@ -765,6 +763,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
}
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
}
case AMDGPU_INFO_VRAM_LOST_COUNTER:
ui32 = atomic_read(&adev->vram_lost_counter);
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
@ -791,12 +792,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
vga_switcheroo_process_delayed_switch();
}
bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter);
}
/**
* amdgpu_driver_open_kms - drm callback for open
*
@ -853,7 +848,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
file_priv->driver_priv = fpriv;
out_suspend:
@ -1023,6 +1017,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER),
DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
/* KMS */

View File

@ -40,9 +40,7 @@
static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo;
bo = container_of(tbo, struct amdgpu_bo, tbo);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
amdgpu_bo_kunmap(bo);
@ -884,7 +882,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
return;
abo = container_of(bo, struct amdgpu_bo, tbo);
abo = ttm_to_amdgpu_bo(bo);
amdgpu_vm_bo_invalidate(adev, abo, evict);
amdgpu_bo_kunmap(abo);
@ -911,7 +909,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
return 0;
abo = container_of(bo, struct amdgpu_bo, tbo);
abo = ttm_to_amdgpu_bo(bo);
/* Remember that this BO was accessed by the CPU */
abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

View File

@ -94,6 +94,11 @@ struct amdgpu_bo {
};
};
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
{
return container_of(tbo, struct amdgpu_bo, tbo);
}
/**
* amdgpu_mem_type_to_domain - return domain corresponding to mem_type
* @mem_type: ttm memory type
@ -188,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
}
}
/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
{
return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
}
int amdgpu_bo_create(struct amdgpu_device *adev,
unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags,

View File

@ -145,6 +145,8 @@ static int amdgpu_pp_hw_init(void *handle)
int ret = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
amdgpu_ucode_init_bo(adev);
if (adev->powerplay.ip_funcs->hw_init)
ret = adev->powerplay.ip_funcs->hw_init(
@ -162,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle)
ret = adev->powerplay.ip_funcs->hw_fini(
adev->powerplay.pp_handle);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
amdgpu_ucode_fini_bo(adev);
return ret;
}

View File

@ -411,6 +411,13 @@ static int psp_hw_init(void *handle)
return 0;
mutex_lock(&adev->firmware.mutex);
/*
* This sequence is just used on hw_init only once, no need on
* resume.
*/
ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;
ret = psp_load_fw(adev);
if (ret) {
@ -435,6 +442,8 @@ static int psp_hw_fini(void *handle)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
amdgpu_ucode_fini_bo(adev);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);

View File

@ -154,6 +154,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->funcs->end_use(ring);
}
/**
* amdgpu_ring_priority_put - restore a ring's priority
*
* @ring: amdgpu_ring structure holding the information
* @priority: target priority
*
* Release a request for executing at @priority
*/
void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
enum amd_sched_priority priority)
{
int i;
if (!ring->funcs->set_priority)
return;
if (atomic_dec_return(&ring->num_jobs[priority]) > 0)
return;
/* no need to restore if the job is already at the lowest priority */
if (priority == AMD_SCHED_PRIORITY_NORMAL)
return;
mutex_lock(&ring->priority_mutex);
/* something higher prio is executing, no need to decay */
if (ring->priority > priority)
goto out_unlock;
/* decay priority to the next level with a job available */
for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) {
if (i == AMD_SCHED_PRIORITY_NORMAL
|| atomic_read(&ring->num_jobs[i])) {
ring->priority = i;
ring->funcs->set_priority(ring, i);
break;
}
}
out_unlock:
mutex_unlock(&ring->priority_mutex);
}
/**
* amdgpu_ring_priority_get - change the ring's priority
*
* @ring: amdgpu_ring structure holding the information
* @priority: target priority
*
* Request a ring's priority to be raised to @priority (refcounted).
*/
void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
enum amd_sched_priority priority)
{
if (!ring->funcs->set_priority)
return;
atomic_inc(&ring->num_jobs[priority]);
mutex_lock(&ring->priority_mutex);
if (priority <= ring->priority)
goto out_unlock;
ring->priority = priority;
ring->funcs->set_priority(ring, priority);
out_unlock:
mutex_unlock(&ring->priority_mutex);
}
/**
* amdgpu_ring_init - init driver ring struct.
*
@ -169,7 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned max_dw, struct amdgpu_irq_src *irq_src,
unsigned irq_type)
{
int r;
int r, i;
int sched_hw_submission = amdgpu_sched_hw_submission;
/* Set the hw submission limit higher for KIQ because
@ -247,9 +316,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
}
ring->max_dw = max_dw;
ring->priority = AMD_SCHED_PRIORITY_NORMAL;
mutex_init(&ring->priority_mutex);
INIT_LIST_HEAD(&ring->lru_list);
amdgpu_ring_lru_touch(adev, ring);
for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i)
atomic_set(&ring->num_jobs[i], 0);
if (amdgpu_debugfs_ring_init(adev, ring)) {
DRM_ERROR("Failed to register debugfs file for rings !\n");
}

View File

@ -24,6 +24,7 @@
#ifndef __AMDGPU_RING_H__
#define __AMDGPU_RING_H__
#include <drm/amdgpu_drm.h>
#include "gpu_scheduler.h"
/* max number of rings */
@ -56,6 +57,7 @@ struct amdgpu_device;
struct amdgpu_ring;
struct amdgpu_ib;
struct amdgpu_cs_parser;
struct amdgpu_job;
/*
* Fences.
@ -88,8 +90,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
void amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
/*
@ -147,6 +153,9 @@ struct amdgpu_ring_funcs {
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring,
enum amd_sched_priority priority);
};
struct amdgpu_ring {
@ -187,6 +196,12 @@ struct amdgpu_ring {
volatile u32 *cond_exe_cpu_addr;
unsigned vm_inv_eng;
bool has_compute_vm_bug;
atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX];
struct mutex priority_mutex;
/* protected by priority_mutex */
int priority;
#if defined(CONFIG_DEBUG_FS)
struct dentry *ent;
#endif
@ -197,6 +212,10 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring);
void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
enum amd_sched_priority priority);
void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
enum amd_sched_priority priority);
int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type);

View File

@ -0,0 +1,109 @@
/*
* Copyright 2017 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Andres Rodriguez <andresx7@gmail.com>
*/
#include <linux/fdtable.h>
#include <linux/pid.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority)
{
switch (amdgpu_priority) {
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
return AMD_SCHED_PRIORITY_HIGH_HW;
case AMDGPU_CTX_PRIORITY_HIGH:
return AMD_SCHED_PRIORITY_HIGH_SW;
case AMDGPU_CTX_PRIORITY_NORMAL:
return AMD_SCHED_PRIORITY_NORMAL;
case AMDGPU_CTX_PRIORITY_LOW:
case AMDGPU_CTX_PRIORITY_VERY_LOW:
return AMD_SCHED_PRIORITY_LOW;
case AMDGPU_CTX_PRIORITY_UNSET:
return AMD_SCHED_PRIORITY_UNSET;
default:
WARN(1, "Invalid context priority %d\n", amdgpu_priority);
return AMD_SCHED_PRIORITY_INVALID;
}
}
static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
int fd,
enum amd_sched_priority priority)
{
struct file *filp = fcheck(fd);
struct drm_file *file;
struct pid *pid;
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx *ctx;
uint32_t id;
if (!filp)
return -EINVAL;
pid = get_pid(((struct drm_file *)filp->private_data)->pid);
mutex_lock(&adev->ddev->filelist_mutex);
list_for_each_entry(file, &adev->ddev->filelist, lhead) {
if (file->pid != pid)
continue;
fpriv = file->driver_priv;
idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
amdgpu_ctx_priority_override(ctx, priority);
}
mutex_unlock(&adev->ddev->filelist_mutex);
put_pid(pid);
return 0;
}
int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
union drm_amdgpu_sched *args = data;
struct amdgpu_device *adev = dev->dev_private;
enum amd_sched_priority priority;
int r;
priority = amdgpu_to_sched_priority(args->in.priority);
if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID)
return -EINVAL;
switch (args->in.op) {
case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
r = amdgpu_sched_process_priority_override(adev,
args->in.fd,
priority);
break;
default:
DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
r = -EINVAL;
break;
}
return r;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
* Copyright 2017 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -19,22 +19,16 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Andres Rodriguez <andresx7@gmail.com>
*/
#ifndef _ICELAND_SMC_H
#define _ICELAND_SMC_H
#include "smumgr.h"
#ifndef __AMDGPU_SCHED_H__
#define __AMDGPU_SCHED_H__
#include <drm/drmP.h>
int iceland_populate_all_graphic_levels(struct pp_hwmgr *hwmgr);
int iceland_populate_all_memory_levels(struct pp_hwmgr *hwmgr);
int iceland_init_smc_table(struct pp_hwmgr *hwmgr);
int iceland_thermal_setup_fan_table(struct pp_hwmgr *hwmgr);
int iceland_update_sclk_threshold(struct pp_hwmgr *hwmgr);
uint32_t iceland_get_offsetof(uint32_t type, uint32_t member);
uint32_t iceland_get_mac_definition(uint32_t value);
int iceland_process_firmware_header(struct pp_hwmgr *hwmgr);
int iceland_initialize_mc_reg_table(struct pp_hwmgr *hwmgr);
bool iceland_is_dpm_running(struct pp_hwmgr *hwmgr);
#endif
enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority);
int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
#endif // __AMDGPU_SCHED_H__

View File

@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
*
* @sync: sync object to add fences from reservation object to
* @resv: reservation object with embedded fence
* @shared: true if we should only sync to the exclusive fence
* @explicit_sync: true if we should only sync to the exclusive fence
*
* Sync to the fence
*/
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
struct reservation_object *resv,
void *owner)
void *owner, bool explicit_sync)
{
struct reservation_object_list *flist;
struct dma_fence *f;
@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
f = reservation_object_get_excl(resv);
r = amdgpu_sync_fence(adev, sync, f);
if (explicit_sync)
return r;
flist = reservation_object_get_list(resv);
if (!flist || r)
return r;

View File

@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
struct reservation_object *resv,
void *owner);
void *owner,
bool explicit_sync);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);

View File

@ -44,6 +44,7 @@
#include <linux/debugfs.h>
#include <linux/iommu.h>
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
#include "bif/bif_4_1_d.h"
@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
placement->num_busy_placement = 1;
return;
}
abo = container_of(bo, struct amdgpu_bo, tbo);
abo = ttm_to_amdgpu_bo(bo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
if (adev->mman.buffer_funcs &&
@ -257,7 +258,7 @@ gtt:
static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
return addr;
}
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem,
struct ttm_mem_reg *old_mem)
/**
* amdgpu_find_mm_node - Helper function finds the drm_mm_node
* corresponding to @offset. It also modifies the offset to be
* within the drm_mm_node returned
*/
static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
unsigned long *offset)
{
struct drm_mm_node *mm_node = mem->mm_node;
while (*offset >= (mm_node->size << PAGE_SHIFT)) {
*offset -= (mm_node->size << PAGE_SHIFT);
++mm_node;
}
return mm_node;
}
/**
* amdgpu_copy_ttm_mem_to_mem - Helper function for copy
*
* The function copies @size bytes from {src->mem + src->offset} to
* {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
* move and different for a BO to BO copy.
*
* @f: Returns the last fence if multiple jobs are submitted.
*/
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct amdgpu_copy_mem *src,
struct amdgpu_copy_mem *dst,
uint64_t size,
struct reservation_object *resv,
struct dma_fence **f)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct drm_mm_node *old_mm, *new_mm;
uint64_t old_start, old_size, new_start, new_size;
unsigned long num_pages;
struct drm_mm_node *src_mm, *dst_mm;
uint64_t src_node_start, dst_node_start, src_node_size,
dst_node_size, src_page_offset, dst_page_offset;
struct dma_fence *fence = NULL;
int r;
BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
int r = 0;
const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
AMDGPU_GPU_PAGE_SIZE);
if (!ring->ready) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}
old_mm = old_mem->mm_node;
old_size = old_mm->size;
old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
src->offset;
src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
src_page_offset = src_node_start & (PAGE_SIZE - 1);
new_mm = new_mem->mm_node;
new_size = new_mm->size;
new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
dst->offset;
dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
num_pages = new_mem->num_pages;
mutex_lock(&adev->mman.gtt_window_lock);
while (num_pages) {
unsigned long cur_pages = min(min(old_size, new_size),
(u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
uint64_t from = old_start, to = new_start;
while (size) {
unsigned long cur_size;
uint64_t from = src_node_start, to = dst_node_start;
struct dma_fence *next;
if (old_mem->mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_is_allocated(old_mem)) {
r = amdgpu_map_buffer(bo, old_mem, cur_pages,
old_start, 0, ring, &from);
/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
* begins at an offset, then adjust the size accordingly
*/
cur_size = min3(min(src_node_size, dst_node_size), size,
GTT_MAX_BYTES);
if (cur_size + src_page_offset > GTT_MAX_BYTES ||
cur_size + dst_page_offset > GTT_MAX_BYTES)
cur_size -= max(src_page_offset, dst_page_offset);
/* Map only what needs to be accessed. Map src to window 0 and
* dst to window 1
*/
if (src->mem->mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_is_allocated(src->mem)) {
r = amdgpu_map_buffer(src->bo, src->mem,
PFN_UP(cur_size + src_page_offset),
src_node_start, 0, ring,
&from);
if (r)
goto error;
/* Adjust the offset because amdgpu_map_buffer returns
* start of mapped page
*/
from += src_page_offset;
}
if (new_mem->mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_is_allocated(new_mem)) {
r = amdgpu_map_buffer(bo, new_mem, cur_pages,
new_start, 1, ring, &to);
if (dst->mem->mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_is_allocated(dst->mem)) {
r = amdgpu_map_buffer(dst->bo, dst->mem,
PFN_UP(cur_size + dst_page_offset),
dst_node_start, 1, ring,
&to);
if (r)
goto error;
to += dst_page_offset;
}
r = amdgpu_copy_buffer(ring, from, to,
cur_pages * PAGE_SIZE,
bo->resv, &next, false, true);
r = amdgpu_copy_buffer(ring, from, to, cur_size,
resv, &next, false, true);
if (r)
goto error;
dma_fence_put(fence);
fence = next;
num_pages -= cur_pages;
if (!num_pages)
size -= cur_size;
if (!size)
break;
old_size -= cur_pages;
if (!old_size) {
old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
old_size = old_mm->size;
src_node_size -= cur_size;
if (!src_node_size) {
src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
src->mem);
src_node_size = (src_mm->size << PAGE_SHIFT);
} else {
old_start += cur_pages * PAGE_SIZE;
src_node_start += cur_size;
src_page_offset = src_node_start & (PAGE_SIZE - 1);
}
new_size -= cur_pages;
if (!new_size) {
new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
new_size = new_mm->size;
dst_node_size -= cur_size;
if (!dst_node_size) {
dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
dst->mem);
dst_node_size = (dst_mm->size << PAGE_SHIFT);
} else {
new_start += cur_pages * PAGE_SIZE;
dst_node_start += cur_size;
dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
}
}
error:
mutex_unlock(&adev->mman.gtt_window_lock);
if (f)
*f = dma_fence_get(fence);
dma_fence_put(fence);
return r;
}
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem,
struct ttm_mem_reg *old_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_copy_mem src, dst;
struct dma_fence *fence = NULL;
int r;
src.bo = bo;
dst.bo = bo;
src.mem = old_mem;
dst.mem = new_mem;
src.offset = 0;
dst.offset = 0;
r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
new_mem->num_pages << PAGE_SHIFT,
bo->resv, &fence);
if (r)
goto error;
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
dma_fence_put(fence);
return r;
error:
mutex_unlock(&adev->mman.gtt_window_lock);
if (fence)
dma_fence_wait(fence, false);
dma_fence_put(fence);
@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
int r;
/* Can't move a pinned BO */
abo = container_of(bo, struct amdgpu_bo, tbo);
abo = ttm_to_amdgpu_bo(bo);
if (WARN_ON_ONCE(abo->pin_count > 0))
return -EINVAL;
@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long page_offset)
{
struct drm_mm_node *mm = bo->mem.mm_node;
uint64_t size = mm->size;
uint64_t offset = page_offset;
struct drm_mm_node *mm;
unsigned long offset = (page_offset << PAGE_SHIFT);
page_offset = do_div(offset, size);
mm += offset;
return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
mm = amdgpu_find_mm_node(&bo->mem, &offset);
return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
(offset >> PAGE_SHIFT);
}
/*
@ -1142,9 +1222,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
{
struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
struct drm_mm_node *nodes;
uint32_t value = 0;
int ret = 0;
uint64_t pos;
@ -1153,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
if (bo->mem.mem_type != TTM_PL_VRAM)
return -EIO;
while (offset >= (nodes->size << PAGE_SHIFT)) {
offset -= nodes->size << PAGE_SHIFT;
++nodes;
}
nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
pos = (nodes->start << PAGE_SHIFT) + offset;
while (len && pos < adev->mc.mc_vram_size) {
@ -1255,6 +1332,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Change the size here instead of the init above so only lpfn is affected */
amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
/*
*The reserved vram for firmware must be pinned to the specified
*place on the VRAM, so reserve it early.
*/
r = amdgpu_fw_reserve_vram_init(adev);
if (r) {
return r;
}
r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->stolen_vga_memory,
@ -1479,7 +1565,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
job->vm_needs_flush = vm_needs_flush;
if (resv) {
r = amdgpu_sync_resv(adev, &job->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED);
AMDGPU_FENCE_OWNER_UNDEFINED,
false);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
goto error_free;
@ -1571,7 +1658,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
if (resv) {
r = amdgpu_sync_resv(adev, &job->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED);
AMDGPU_FENCE_OWNER_UNDEFINED, false);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
goto error_free;

View File

@ -58,6 +58,12 @@ struct amdgpu_mman {
struct amd_sched_entity entity;
};
struct amdgpu_copy_mem {
struct ttm_buffer_object *bo;
struct ttm_mem_reg *mem;
unsigned long offset;
};
extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
struct reservation_object *resv,
struct dma_fence **fence, bool direct_submit,
bool vm_needs_flush);
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct amdgpu_copy_mem *src,
struct amdgpu_copy_mem *dst,
uint64_t size,
struct reservation_object *resv,
struct dma_fence **f);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint64_t src_data,
struct reservation_object *resv,

View File

@ -22,7 +22,7 @@
*/
#include "amdgpu.h"
#define MAX_KIQ_REG_WAIT 100000
#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */
int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
{
@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
{
signed long r;
uint32_t val;
struct dma_fence *f;
uint32_t val, seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_rreg);
mutex_lock(&kiq->ring_mutex);
spin_lock(&kiq->ring_lock);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg);
amdgpu_fence_emit(ring, &f);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
mutex_unlock(&kiq->ring_mutex);
spin_unlock(&kiq->ring_lock);
r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
dma_fence_put(f);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
if (r < 1) {
DRM_ERROR("wait for kiq fence error: %ld.\n", r);
DRM_ERROR("wait for kiq fence error: %ld\n", r);
return ~0;
}
val = adev->wb.wb[adev->virt.reg_val_offs];
return val;
@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
{
signed long r;
struct dma_fence *f;
uint32_t seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_wreg);
mutex_lock(&kiq->ring_mutex);
spin_lock(&kiq->ring_lock);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_fence_emit(ring, &f);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
mutex_unlock(&kiq->ring_mutex);
spin_unlock(&kiq->ring_lock);
r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
if (r < 1)
DRM_ERROR("wait for kiq fence error: %ld.\n", r);
dma_fence_put(f);
DRM_ERROR("wait for kiq fence error: %ld\n", r);
}
/**
@ -274,3 +270,78 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
(void *)&adev->virt.mm_table.cpu_addr);
adev->virt.mm_table.gpu_addr = 0;
}
int amdgpu_virt_fw_reserve_get_checksum(void *obj,
unsigned long obj_size,
unsigned int key,
unsigned int chksum)
{
unsigned int ret = key;
unsigned long i = 0;
unsigned char *pos;
pos = (char *)obj;
/* calculate checksum */
for (i = 0; i < obj_size; ++i)
ret += *(pos + i);
/* minus the chksum itself */
pos = (char *)&chksum;
for (i = 0; i < sizeof(chksum); ++i)
ret -= *(pos + i);
return ret;
}
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
uint32_t pf2vf_ver = 0;
uint32_t pf2vf_size = 0;
uint32_t checksum = 0;
uint32_t checkval;
char *str;
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
if (adev->fw_vram_usage.va != NULL) {
adev->virt.fw_reserve.p_pf2vf =
(struct amdgim_pf2vf_info_header *)(
adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET);
pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version;
AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size);
AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum);
/* pf2vf message must be in 4K */
if (pf2vf_size > 0 && pf2vf_size < 4096) {
checkval = amdgpu_virt_fw_reserve_get_checksum(
adev->virt.fw_reserve.p_pf2vf, pf2vf_size,
adev->virt.fw_reserve.checksum_key, checksum);
if (checkval == checksum) {
adev->virt.fw_reserve.p_vf2pf =
((void *)adev->virt.fw_reserve.p_pf2vf +
pf2vf_size);
memset((void *)adev->virt.fw_reserve.p_vf2pf, 0,
sizeof(amdgim_vf2pf_info));
AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version,
AMDGPU_FW_VRAM_VF2PF_VER);
AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size,
sizeof(amdgim_vf2pf_info));
AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version,
&str);
if (THIS_MODULE->version != NULL)
strcpy(str, THIS_MODULE->version);
else
strcpy(str, "N/A");
AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert,
0);
AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum,
amdgpu_virt_fw_reserve_get_checksum(
adev->virt.fw_reserve.p_vf2pf,
pf2vf_size,
adev->virt.fw_reserve.checksum_key, 0));
}
}
}
}

View File

@ -58,6 +58,179 @@ struct amdgpu_virt_ops {
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
};
/*
* Firmware Reserve Frame buffer
*/
struct amdgpu_virt_fw_reserve {
struct amdgim_pf2vf_info_header *p_pf2vf;
struct amdgim_vf2pf_info_header *p_vf2pf;
unsigned int checksum_key;
};
/*
* Defination between PF and VF
* Structures forcibly aligned to 4 to keep the same style as PF.
*/
#define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024)
#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \
(total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2))
enum AMDGIM_FEATURE_FLAG {
/* GIM supports feature of Error log collecting */
AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1,
/* GIM supports feature of loading uCodes */
AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2,
};
struct amdgim_pf2vf_info_header {
/* the total structure size in byte. */
uint32_t size;
/* version of this structure, written by the GIM */
uint32_t version;
} __aligned(4);
struct amdgim_pf2vf_info_v1 {
/* header contains size and version */
struct amdgim_pf2vf_info_header header;
/* max_width * max_height */
unsigned int uvd_enc_max_pixels_count;
/* 16x16 pixels/sec, codec independent */
unsigned int uvd_enc_max_bandwidth;
/* max_width * max_height */
unsigned int vce_enc_max_pixels_count;
/* 16x16 pixels/sec, codec independent */
unsigned int vce_enc_max_bandwidth;
/* MEC FW position in kb from the start of visible frame buffer */
unsigned int mecfw_kboffset;
/* The features flags of the GIM driver supports. */
unsigned int feature_flags;
/* use private key from mailbox 2 to create chueksum */
unsigned int checksum;
} __aligned(4);
struct amdgim_pf2vf_info_v2 {
/* header contains size and version */
struct amdgim_pf2vf_info_header header;
/* use private key from mailbox 2 to create chueksum */
uint32_t checksum;
/* The features flags of the GIM driver supports. */
uint32_t feature_flags;
/* max_width * max_height */
uint32_t uvd_enc_max_pixels_count;
/* 16x16 pixels/sec, codec independent */
uint32_t uvd_enc_max_bandwidth;
/* max_width * max_height */
uint32_t vce_enc_max_pixels_count;
/* 16x16 pixels/sec, codec independent */
uint32_t vce_enc_max_bandwidth;
/* MEC FW position in kb from the start of VF visible frame buffer */
uint64_t mecfw_kboffset;
/* MEC FW size in KB */
uint32_t mecfw_ksize;
/* UVD FW position in kb from the start of VF visible frame buffer */
uint64_t uvdfw_kboffset;
/* UVD FW size in KB */
uint32_t uvdfw_ksize;
/* VCE FW position in kb from the start of VF visible frame buffer */
uint64_t vcefw_kboffset;
/* VCE FW size in KB */
uint32_t vcefw_ksize;
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)];
} __aligned(4);
struct amdgim_vf2pf_info_header {
/* the total structure size in byte. */
uint32_t size;
/*version of this structure, written by the guest */
uint32_t version;
} __aligned(4);
struct amdgim_vf2pf_info_v1 {
/* header contains size and version */
struct amdgim_vf2pf_info_header header;
/* driver version */
char driver_version[64];
/* driver certification, 1=WHQL, 0=None */
unsigned int driver_cert;
/* guest OS type and version: need a define */
unsigned int os_info;
/* in the unit of 1M */
unsigned int fb_usage;
/* guest gfx engine usage percentage */
unsigned int gfx_usage;
/* guest gfx engine health percentage */
unsigned int gfx_health;
/* guest compute engine usage percentage */
unsigned int compute_usage;
/* guest compute engine health percentage */
unsigned int compute_health;
/* guest vce engine usage percentage. 0xffff means N/A. */
unsigned int vce_enc_usage;
/* guest vce engine health percentage. 0xffff means N/A. */
unsigned int vce_enc_health;
/* guest uvd engine usage percentage. 0xffff means N/A. */
unsigned int uvd_enc_usage;
/* guest uvd engine usage percentage. 0xffff means N/A. */
unsigned int uvd_enc_health;
unsigned int checksum;
} __aligned(4);
struct amdgim_vf2pf_info_v2 {
/* header contains size and version */
struct amdgim_vf2pf_info_header header;
uint32_t checksum;
/* driver version */
uint8_t driver_version[64];
/* driver certification, 1=WHQL, 0=None */
uint32_t driver_cert;
/* guest OS type and version: need a define */
uint32_t os_info;
/* in the unit of 1M */
uint32_t fb_usage;
/* guest gfx engine usage percentage */
uint32_t gfx_usage;
/* guest gfx engine health percentage */
uint32_t gfx_health;
/* guest compute engine usage percentage */
uint32_t compute_usage;
/* guest compute engine health percentage */
uint32_t compute_health;
/* guest vce engine usage percentage. 0xffff means N/A. */
uint32_t vce_enc_usage;
/* guest vce engine health percentage. 0xffff means N/A. */
uint32_t vce_enc_health;
/* guest uvd engine usage percentage. 0xffff means N/A. */
uint32_t uvd_enc_usage;
/* guest uvd engine usage percentage. 0xffff means N/A. */
uint32_t uvd_enc_health;
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)];
} __aligned(4);
#define AMDGPU_FW_VRAM_VF2PF_VER 2
typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
#define AMDGPU_FW_VRAM_VF2PF_WRITE(adev, field, val) \
do { \
((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field = (val); \
} while (0)
#define AMDGPU_FW_VRAM_VF2PF_READ(adev, field, val) \
do { \
(*val) = ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field; \
} while (0)
#define AMDGPU_FW_VRAM_PF2VF_READ(adev, field, val) \
do { \
if (!adev->virt.fw_reserve.p_pf2vf) \
*(val) = 0; \
else { \
if (adev->virt.fw_reserve.p_pf2vf->version == 1) \
*(val) = ((struct amdgim_pf2vf_info_v1 *)adev->virt.fw_reserve.p_pf2vf)->field; \
if (adev->virt.fw_reserve.p_pf2vf->version == 2) \
*(val) = ((struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf)->field; \
} \
} while (0)
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@ -72,6 +245,7 @@ struct amdgpu_virt {
struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
struct amdgpu_vf_error_buffer vf_errors;
struct amdgpu_virt_fw_reserve fw_reserve;
};
#define AMDGPU_CSA_SIZE (8 * 1024)
@ -114,5 +288,9 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
unsigned int key,
unsigned int chksum);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
#endif

View File

@ -328,9 +328,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
AMDGPU_GEM_CREATE_SHADOW);
if (vm->pte_support_ats) {
init_value = AMDGPU_PTE_SYSTEM;
init_value = AMDGPU_PTE_DEFAULT_ATC;
if (level != adev->vm_manager.num_level - 1)
init_value |= AMDGPU_PDE_PTE;
}
/* walk over the address space and allocate the page tables */
@ -1034,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int r;
amdgpu_sync_create(&sync);
amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
r = amdgpu_sync_wait(&sync, true);
amdgpu_sync_free(&sync);
@ -1175,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
amdgpu_ring_pad_ib(ring, params.ib);
amdgpu_sync_resv(adev, &job->sync,
parent->base.bo->tbo.resv,
AMDGPU_FENCE_OWNER_VM);
AMDGPU_FENCE_OWNER_VM, false);
if (shadow)
amdgpu_sync_resv(adev, &job->sync,
shadow->tbo.resv,
AMDGPU_FENCE_OWNER_VM);
AMDGPU_FENCE_OWNER_VM, false);
WARN_ON(params.ib->length_dw > ndw);
r = amdgpu_job_submit(job, ring, &vm->entity,
@ -1643,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
goto error_free;
r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
owner);
owner, false);
if (r)
goto error_free;
@ -1698,6 +1699,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
struct drm_mm_node *nodes,
struct dma_fence **fence)
{
unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
uint64_t pfn, start = mapping->start;
int r;
@ -1732,6 +1734,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
}
do {
dma_addr_t *dma_addr = NULL;
uint64_t max_entries;
uint64_t addr, last;
@ -1745,15 +1748,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
}
if (pages_addr) {
uint64_t count;
max_entries = min(max_entries, 16ull * 1024ull);
addr = 0;
for (count = 1; count < max_entries; ++count) {
uint64_t idx = pfn + count;
if (pages_addr[idx] !=
(pages_addr[idx - 1] + PAGE_SIZE))
break;
}
if (count < min_linear_pages) {
addr = pfn << PAGE_SHIFT;
dma_addr = pages_addr;
} else {
addr = pages_addr[pfn];
max_entries = count;
}
} else if (flags & AMDGPU_PTE_VALID) {
addr += adev->vm_manager.vram_base_offset;
addr += pfn << PAGE_SHIFT;
}
addr += pfn << PAGE_SHIFT;
last = min((uint64_t)mapping->last, start + max_entries - 1);
r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
start, last, flags, addr,
fence);
if (r)
@ -2017,7 +2037,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
list_del(&mapping->list);
if (vm->pte_support_ats)
init_pte_value = AMDGPU_PTE_SYSTEM;
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
mapping->start, mapping->last,
@ -2629,7 +2649,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (adev->asic_type == CHIP_RAVEN) {
vm->pte_support_ats = true;
init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE;
init_pde_value = AMDGPU_PTE_DEFAULT_ATC
| AMDGPU_PDE_PTE;
}
} else
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@ -2737,8 +2759,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
struct amdgpu_bo *root;
u64 fault;
int i;
int i, r;
/* Clear pending page faults from IH when the VM is destroyed */
while (kfifo_get(&vm->faults, &fault))
@ -2773,7 +2796,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
}
amdgpu_vm_free_levels(&vm->root);
root = amdgpu_bo_ref(vm->root.base.bo);
r = amdgpu_bo_reserve(root, true);
if (r) {
dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
} else {
amdgpu_vm_free_levels(&vm->root);
amdgpu_bo_unreserve(root);
}
amdgpu_bo_unref(&root);
dma_fence_put(vm->last_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
amdgpu_vm_free_reserved_vmid(adev, vm, i);

View File

@ -73,6 +73,16 @@ struct amdgpu_bo_list_entry;
#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
/* For Raven */
#define AMDGPU_MTYPE_CC 2
#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \
| AMDGPU_PTE_SNOOPED \
| AMDGPU_PTE_EXECUTABLE \
| AMDGPU_PTE_READABLE \
| AMDGPU_PTE_WRITEABLE \
| AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC))
/* How to programm VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0
#define AMDGPU_VM_FAULT_STOP_FIRST 1

View File

@ -20,6 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/kernel.h>
#include <linux/firmware.h>
#include <drm/drmP.h>
#include "amdgpu.h"
@ -3952,10 +3953,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
adev->gfx.rlc.reg_list_format_size_bytes >> 2,
unique_indices,
&indices_count,
sizeof(unique_indices) / sizeof(int),
ARRAY_SIZE(unique_indices),
indirect_start_offsets,
&offset_count,
sizeof(indirect_start_offsets)/sizeof(int));
ARRAY_SIZE(indirect_start_offsets));
/* save and restore list */
WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
@ -3977,14 +3978,14 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
/* starting offsets starts */
WREG32(mmRLC_GPM_SCRATCH_ADDR,
adev->gfx.rlc.starting_offsets_start);
for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
WREG32(mmRLC_GPM_SCRATCH_DATA,
indirect_start_offsets[i]);
/* unique indices */
temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
data = mmRLC_SRM_INDEX_CNTL_DATA_0;
for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
if (unique_indices[i] != 0) {
WREG32(temp + i, unique_indices[i] & 0x3FFFF);
WREG32(data + i, unique_indices[i] >> 20);
@ -6394,6 +6395,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
bool acquire)
{
struct amdgpu_device *adev = ring->adev;
int pipe_num, tmp, reg;
int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
/* first me only has 2 entries, GFX and HP3D */
if (ring->me > 0)
pipe_num -= 2;
reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
tmp = RREG32(reg);
tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
WREG32(reg, tmp);
}
static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
bool acquire)
{
int i, pipe;
bool reserve;
struct amdgpu_ring *iring;
mutex_lock(&adev->gfx.pipe_reserve_mutex);
pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
if (acquire)
set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
else
clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
/* Clear all reservations - everyone reacquires all resources */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
true);
for (i = 0; i < adev->gfx.num_compute_rings; ++i)
gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
true);
} else {
/* Lower all pipes without a current reservation */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
iring = &adev->gfx.gfx_ring[i];
pipe = amdgpu_gfx_queue_to_bit(adev,
iring->me,
iring->pipe,
0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v8_0_ring_set_pipe_percent(iring, reserve);
}
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
iring = &adev->gfx.compute_ring[i];
pipe = amdgpu_gfx_queue_to_bit(adev,
iring->me,
iring->pipe,
0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v8_0_ring_set_pipe_percent(iring, reserve);
}
}
mutex_unlock(&adev->gfx.pipe_reserve_mutex);
}
static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
bool acquire)
{
uint32_t pipe_priority = acquire ? 0x2 : 0x0;
uint32_t queue_priority = acquire ? 0xf : 0x0;
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
enum amd_sched_priority priority)
{
struct amdgpu_device *adev = ring->adev;
bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
return;
gfx_v8_0_hqd_set_priority(adev, ring, acquire);
gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
}
static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
u64 addr, u64 seq,
unsigned flags)
@ -6839,6 +6938,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.set_priority = gfx_v8_0_ring_set_priority_compute,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {

View File

@ -20,6 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/kernel.h>
#include <linux/firmware.h>
#include <drm/drmP.h>
#include "amdgpu.h"
@ -1730,10 +1731,10 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
adev->gfx.rlc.reg_list_format_size_bytes >> 2,
unique_indirect_regs,
&unique_indirect_reg_count,
sizeof(unique_indirect_regs)/sizeof(int),
ARRAY_SIZE(unique_indirect_regs),
indirect_start_offsets,
&indirect_start_offsets_count,
sizeof(indirect_start_offsets)/sizeof(int));
ARRAY_SIZE(indirect_start_offsets));
/* enable auto inc in case it is disabled */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@ -1770,12 +1771,12 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
/* write the starting offsets to RLC scratch ram */
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
adev->gfx.rlc.starting_offsets_start);
for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
indirect_start_offsets[i]);
/* load unique indirect regs*/
for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) {
for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
unique_indirect_regs[i] & 0x3FFFF);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,

View File

@ -183,6 +183,12 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
return r;
}
/* Retrieve checksum from mailbox2 */
if (req == IDH_REQ_GPU_INIT_ACCESS) {
adev->virt.fw_reserve.checksum_key =
RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
}
}
return 0;

View File

@ -279,10 +279,7 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev)
}
static u32 soc15_get_xclk(struct amdgpu_device *adev)
{
if (adev->asic_type == CHIP_VEGA10)
return adev->clock.spll.reference_freq/4;
else
return adev->clock.spll.reference_freq;
return adev->clock.spll.reference_freq;
}

View File

@ -268,8 +268,9 @@ err:
*
* Close up a stream for HW test or if userspace failed to do so
*/
int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence)
static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
uint32_t handle,
bool direct, struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;

View File

@ -961,18 +961,13 @@ static void cz_clear_voting_clients(struct pp_hwmgr *hwmgr)
static int cz_start_dpm(struct pp_hwmgr *hwmgr)
{
int ret = 0;
struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
unsigned long dpm_features = 0;
cz_hwmgr->dpm_flags |= DPMFlags_SCLK_Enabled;
dpm_features |= SCLK_DPM_MASK;
ret = smum_send_msg_to_smc_with_parameter(hwmgr,
return smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_EnableAllSmuFeatures,
dpm_features);
return ret;
SCLK_DPM_MASK);
}
static int cz_stop_dpm(struct pp_hwmgr *hwmgr)
@ -1279,27 +1274,18 @@ static int cz_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
int cz_dpm_powerdown_uvd(struct pp_hwmgr *hwmgr)
{
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_UVDPowerGating))
return smum_send_msg_to_smc(hwmgr,
PPSMC_MSG_UVDPowerOFF);
if (PP_CAP(PHM_PlatformCaps_UVDPowerGating))
return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UVDPowerOFF);
return 0;
}
int cz_dpm_powerup_uvd(struct pp_hwmgr *hwmgr)
{
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_UVDPowerGating)) {
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_UVDDynamicPowerGating)) {
return smum_send_msg_to_smc_with_parameter(
hwmgr,
PPSMC_MSG_UVDPowerON, 1);
} else {
return smum_send_msg_to_smc_with_parameter(
hwmgr,
PPSMC_MSG_UVDPowerON, 0);
}
if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) {
return smum_send_msg_to_smc_with_parameter(
hwmgr,
PPSMC_MSG_UVDPowerON,
PP_CAP(PHM_PlatformCaps_UVDDynamicPowerGating) ? 1 : 0);
}
return 0;
@ -1313,17 +1299,16 @@ int cz_dpm_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate)
if (!bgate) {
/* Stable Pstate is enabled and we need to set the UVD DPM to highest level */
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_StablePState)
|| hwmgr->en_umd_pstate) {
if (PP_CAP(PHM_PlatformCaps_StablePState) ||
hwmgr->en_umd_pstate) {
cz_hwmgr->uvd_dpm.hard_min_clk =
ptable->entries[ptable->count - 1].vclk;
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetUvdHardMin,
cz_get_uvd_level(hwmgr,
cz_hwmgr->uvd_dpm.hard_min_clk,
PPSMC_MSG_SetUvdHardMin));
PPSMC_MSG_SetUvdHardMin,
cz_get_uvd_level(hwmgr,
cz_hwmgr->uvd_dpm.hard_min_clk,
PPSMC_MSG_SetUvdHardMin));
cz_enable_disable_uvd_dpm(hwmgr, true);
} else {
@ -1343,17 +1328,16 @@ int cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr)
hwmgr->dyn_state.vce_clock_voltage_dependency_table;
/* Stable Pstate is enabled and we need to set the VCE DPM to highest level */
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_StablePState)
|| hwmgr->en_umd_pstate) {
if (PP_CAP(PHM_PlatformCaps_StablePState) ||
hwmgr->en_umd_pstate) {
cz_hwmgr->vce_dpm.hard_min_clk =
ptable->entries[ptable->count - 1].ecclk;
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetEclkHardMin,
cz_get_eclk_level(hwmgr,
cz_hwmgr->vce_dpm.hard_min_clk,
PPSMC_MSG_SetEclkHardMin));
PPSMC_MSG_SetEclkHardMin,
cz_get_eclk_level(hwmgr,
cz_hwmgr->vce_dpm.hard_min_clk,
PPSMC_MSG_SetEclkHardMin));
} else {
/*Program HardMin based on the vce_arbiter.ecclk */
if (hwmgr->vce_arbiter.ecclk == 0) {
@ -1366,10 +1350,10 @@ int cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr)
} else {
cz_hwmgr->vce_dpm.hard_min_clk = hwmgr->vce_arbiter.ecclk;
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetEclkHardMin,
cz_get_eclk_level(hwmgr,
cz_hwmgr->vce_dpm.hard_min_clk,
PPSMC_MSG_SetEclkHardMin));
PPSMC_MSG_SetEclkHardMin,
cz_get_eclk_level(hwmgr,
cz_hwmgr->vce_dpm.hard_min_clk,
PPSMC_MSG_SetEclkHardMin));
}
}
return 0;
@ -1377,8 +1361,7 @@ int cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr)
int cz_dpm_powerdown_vce(struct pp_hwmgr *hwmgr)
{
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_VCEPowerGating))
if (PP_CAP(PHM_PlatformCaps_VCEPowerGating))
return smum_send_msg_to_smc(hwmgr,
PPSMC_MSG_VCEPowerOFF);
return 0;
@ -1386,8 +1369,7 @@ int cz_dpm_powerdown_vce(struct pp_hwmgr *hwmgr)
int cz_dpm_powerup_vce(struct pp_hwmgr *hwmgr)
{
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_VCEPowerGating))
if (PP_CAP(PHM_PlatformCaps_VCEPowerGating))
return smum_send_msg_to_smc(hwmgr,
PPSMC_MSG_VCEPowerON);
return 0;
@ -1871,6 +1853,33 @@ static int cz_read_sensor(struct pp_hwmgr *hwmgr, int idx,
}
}
static int cz_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
uint32_t virtual_addr_low,
uint32_t virtual_addr_hi,
uint32_t mc_addr_low,
uint32_t mc_addr_hi,
uint32_t size)
{
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DramAddrHiVirtual,
mc_addr_hi);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DramAddrLoVirtual,
mc_addr_low);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DramAddrHiPhysical,
virtual_addr_hi);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DramAddrLoPhysical,
virtual_addr_low);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DramBufferSize,
size);
return 0;
}
static const struct pp_hwmgr_func cz_hwmgr_funcs = {
.backend_init = cz_hwmgr_backend_init,
.backend_fini = cz_hwmgr_backend_fini,
@ -1894,12 +1903,14 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = {
.get_current_shallow_sleep_clocks = cz_get_current_shallow_sleep_clocks,
.get_clock_by_type = cz_get_clock_by_type,
.get_max_high_clocks = cz_get_max_high_clocks,
.get_temperature = cz_thermal_get_temperature,
.read_sensor = cz_read_sensor,
.power_off_asic = cz_power_off_asic,
.asic_setup = cz_setup_asic_task,
.dynamic_state_management_enable = cz_enable_dpm_tasks,
.power_state_set = cz_set_power_state_tasks,
.dynamic_state_management_disable = cz_disable_dpm_tasks,
.notify_cac_buffer_info = cz_notify_cac_buffer_info,
};
int cz_init_function_pointers(struct pp_hwmgr *hwmgr)

View File

@ -4645,6 +4645,47 @@ static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool enable)
return 0;
}
static int smu7_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
uint32_t virtual_addr_low,
uint32_t virtual_addr_hi,
uint32_t mc_addr_low,
uint32_t mc_addr_hi,
uint32_t size)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
data->soft_regs_start +
smum_get_offsetof(hwmgr,
SMU_SoftRegisters, DRAM_LOG_ADDR_H),
mc_addr_hi);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
data->soft_regs_start +
smum_get_offsetof(hwmgr,
SMU_SoftRegisters, DRAM_LOG_ADDR_L),
mc_addr_low);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
data->soft_regs_start +
smum_get_offsetof(hwmgr,
SMU_SoftRegisters, DRAM_LOG_PHY_ADDR_H),
virtual_addr_hi);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
data->soft_regs_start +
smum_get_offsetof(hwmgr,
SMU_SoftRegisters, DRAM_LOG_PHY_ADDR_L),
virtual_addr_low);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
data->soft_regs_start +
smum_get_offsetof(hwmgr,
SMU_SoftRegisters, DRAM_LOG_BUFF_SIZE),
size);
return 0;
}
static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
.backend_init = &smu7_hwmgr_backend_init,
.backend_fini = &smu7_hwmgr_backend_fini,
@ -4696,6 +4737,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
.avfs_control = smu7_avfs_control,
.disable_smc_firmware_ctf = smu7_thermal_disable_alert,
.start_thermal_controller = smu7_start_thermal_controller,
.notify_cac_buffer_info = smu7_notify_cac_buffer_info,
};
uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,

View File

@ -1161,6 +1161,8 @@ static void vega10_setup_default_single_dpm_table(struct pp_hwmgr *hwmgr,
{
int i;
dpm_table->count = 0;
for (i = 0; i < dep_table->count; i++) {
if (i == 0 || dpm_table->dpm_levels[dpm_table->count - 1].value <=
dep_table->entries[i].clk) {
@ -1269,10 +1271,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
/* Initialize Sclk DPM table based on allow Sclk values */
data->dpm_table.soc_table.count = 0;
data->dpm_table.gfx_table.count = 0;
data->dpm_table.dcef_table.count = 0;
dpm_table = &(data->dpm_table.soc_table);
vega10_setup_default_single_dpm_table(hwmgr,
dpm_table,
@ -4994,6 +4992,33 @@ static int vega10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
return 0;
}
static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
uint32_t virtual_addr_low,
uint32_t virtual_addr_hi,
uint32_t mc_addr_low,
uint32_t mc_addr_hi,
uint32_t size)
{
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSystemVirtualDramAddrHigh,
virtual_addr_hi);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSystemVirtualDramAddrLow,
virtual_addr_low);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DramLogSetDramAddrHigh,
mc_addr_hi);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DramLogSetDramAddrLow,
mc_addr_low);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_DramLogSetDramSize,
size);
return 0;
}
static int vega10_register_thermal_interrupt(struct pp_hwmgr *hwmgr,
const void *info)
{
@ -5079,7 +5104,9 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
.get_mclk_od = vega10_get_mclk_od,
.set_mclk_od = vega10_set_mclk_od,
.avfs_control = vega10_avfs_enable,
.notify_cac_buffer_info = vega10_notify_cac_buffer_info,
.register_internal_thermal_interrupt = vega10_register_thermal_interrupt,
.start_thermal_controller = vega10_start_thermal_controller,
};
int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)

View File

@ -291,8 +291,7 @@ static int get_mm_clock_voltage_table(
table_size = sizeof(uint32_t) +
sizeof(phm_ppt_v1_mm_clock_voltage_dependency_record) *
mm_dependency_table->ucNumEntries;
mm_table = (phm_ppt_v1_mm_clock_voltage_dependency_table *)
kzalloc(table_size, GFP_KERNEL);
mm_table = kzalloc(table_size, GFP_KERNEL);
if (!mm_table)
return -ENOMEM;
@ -519,8 +518,7 @@ static int get_socclk_voltage_dependency_table(
sizeof(phm_ppt_v1_clock_voltage_dependency_record) *
clk_dep_table->ucNumEntries;
clk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
kzalloc(table_size, GFP_KERNEL);
clk_table = kzalloc(table_size, GFP_KERNEL);
if (!clk_table)
return -ENOMEM;
@ -554,8 +552,7 @@ static int get_mclk_voltage_dependency_table(
sizeof(phm_ppt_v1_clock_voltage_dependency_record) *
mclk_dep_table->ucNumEntries;
mclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
kzalloc(table_size, GFP_KERNEL);
mclk_table = kzalloc(table_size, GFP_KERNEL);
if (!mclk_table)
return -ENOMEM;
@ -596,8 +593,7 @@ static int get_gfxclk_voltage_dependency_table(
sizeof(phm_ppt_v1_clock_voltage_dependency_record) *
clk_dep_table->ucNumEntries;
clk_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)
kzalloc(table_size, GFP_KERNEL);
clk_table = kzalloc(table_size, GFP_KERNEL);
if (!clk_table)
return -ENOMEM;
@ -663,8 +659,7 @@ static int get_pix_clk_voltage_dependency_table(
sizeof(phm_ppt_v1_clock_voltage_dependency_record) *
clk_dep_table->ucNumEntries;
clk_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)
kzalloc(table_size, GFP_KERNEL);
clk_table = kzalloc(table_size, GFP_KERNEL);
if (!clk_table)
return -ENOMEM;
@ -728,8 +723,7 @@ static int get_dcefclk_voltage_dependency_table(
sizeof(phm_ppt_v1_clock_voltage_dependency_record) *
num_entries;
clk_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)
kzalloc(table_size, GFP_KERNEL);
clk_table = kzalloc(table_size, GFP_KERNEL);
if (!clk_table)
return -ENOMEM;
@ -772,8 +766,7 @@ static int get_pcie_table(struct pp_hwmgr *hwmgr,
sizeof(struct phm_ppt_v1_pcie_record) *
atom_pcie_table->ucNumEntries;
pcie_table = (struct phm_ppt_v1_pcie_table *)
kzalloc(table_size, GFP_KERNEL);
pcie_table = kzalloc(table_size, GFP_KERNEL);
if (!pcie_table)
return -ENOMEM;
@ -1026,10 +1019,9 @@ static int get_vddc_lookup_table(
table_size = sizeof(uint32_t) +
sizeof(phm_ppt_v1_voltage_lookup_record) * max_levels;
table = (phm_ppt_v1_voltage_lookup_table *)
kzalloc(table_size, GFP_KERNEL);
table = kzalloc(table_size, GFP_KERNEL);
if (NULL == table)
if (table == NULL)
return -ENOMEM;
table->count = vddc_lookup_pp_tables->ucNumEntries;
@ -1138,12 +1130,12 @@ int vega10_pp_tables_initialize(struct pp_hwmgr *hwmgr)
hwmgr->pptable = kzalloc(sizeof(struct phm_ppt_v2_information), GFP_KERNEL);
PP_ASSERT_WITH_CODE((NULL != hwmgr->pptable),
PP_ASSERT_WITH_CODE((hwmgr->pptable != NULL),
"Failed to allocate hwmgr->pptable!", return -ENOMEM);
powerplay_table = get_powerplay_table(hwmgr);
PP_ASSERT_WITH_CODE((NULL != powerplay_table),
PP_ASSERT_WITH_CODE((powerplay_table != NULL),
"Missing PowerPlay Table!", return -1);
result = check_powerplay_tables(hwmgr, powerplay_table);
@ -1182,7 +1174,6 @@ int vega10_pp_tables_initialize(struct pp_hwmgr *hwmgr)
static int vega10_pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
{
int result = 0;
struct phm_ppt_v2_information *pp_table_info =
(struct phm_ppt_v2_information *)(hwmgr->pptable);
@ -1225,7 +1216,7 @@ static int vega10_pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
kfree(hwmgr->pptable);
hwmgr->pptable = NULL;
return result;
return 0;
}
const struct pp_table_func vega10_pptable_funcs = {
@ -1238,7 +1229,7 @@ int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr)
const ATOM_Vega10_State_Array *state_arrays;
const ATOM_Vega10_POWERPLAYTABLE *pp_table = get_powerplay_table(hwmgr);
PP_ASSERT_WITH_CODE((NULL != pp_table),
PP_ASSERT_WITH_CODE((pp_table != NULL),
"Missing PowerPlay Table!", return -1);
PP_ASSERT_WITH_CODE((pp_table->sHeader.format_revision >=
ATOM_Vega10_TABLE_REVISION_VEGA10),

View File

@ -71,7 +71,8 @@ extern int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr,
extern int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr);
extern int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr);
extern int vega10_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr);
extern int vega10_start_thermal_controller(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *range);
extern uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -363,6 +363,12 @@ struct pp_hwmgr_func {
int (*set_active_display_count)(struct pp_hwmgr *hwmgr, uint32_t count);
int (*set_deep_sleep_dcefclk)(struct pp_hwmgr *hwmgr, uint32_t clock);
int (*start_thermal_controller)(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range);
int (*notify_cac_buffer_info)(struct pp_hwmgr *hwmgr,
uint32_t virtual_addr_low,
uint32_t virtual_addr_hi,
uint32_t mc_addr_low,
uint32_t mc_addr_hi,
uint32_t size);
};
struct pp_table_func {

View File

@ -75,6 +75,11 @@ enum SMU_MEMBER {
VceBootLevel,
SamuBootLevel,
LowSclkInterruptThreshold,
DRAM_LOG_ADDR_H,
DRAM_LOG_ADDR_L,
DRAM_LOG_PHY_ADDR_H,
DRAM_LOG_PHY_ADDR_L,
DRAM_LOG_BUFF_SIZE,
};

View File

@ -124,6 +124,8 @@ typedef uint16_t PPSMC_Result;
#define PPSMC_MSG_NumOfDisplays 0x56
#define PPSMC_MSG_ReadSerialNumTop32 0x58
#define PPSMC_MSG_ReadSerialNumBottom32 0x59
#define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x5A
#define PPSMC_MSG_SetSystemVirtualDramAddrLow 0x5B
#define PPSMC_MSG_RunAcgBtc 0x5C
#define PPSMC_MSG_RunAcgInClosedLoop 0x5D
#define PPSMC_MSG_RunAcgInOpenLoop 0x5E

View File

@ -2,9 +2,9 @@
# Makefile for the 'smu manager' sub-component of powerplay.
# It provides the smu management services for the driver.
SMU_MGR = smumgr.o cz_smumgr.o tonga_smumgr.o fiji_smumgr.o fiji_smc.o \
polaris10_smumgr.o iceland_smumgr.o polaris10_smc.o tonga_smc.o \
smu7_smumgr.o iceland_smc.o vega10_smumgr.o rv_smumgr.o ci_smc.o
SMU_MGR = smumgr.o cz_smumgr.o tonga_smumgr.o fiji_smumgr.o \
polaris10_smumgr.o iceland_smumgr.o \
smu7_smumgr.o vega10_smumgr.o rv_smumgr.o ci_smumgr.o
AMD_PP_SMUMGR = $(addprefix $(AMD_PP_PATH)/smumgr/,$(SMU_MGR))

View File

@ -2266,6 +2266,16 @@ static uint32_t ci_get_offsetof(uint32_t type, uint32_t member)
return offsetof(SMU7_SoftRegisters, PreVBlankGap);
case VBlankTimeout:
return offsetof(SMU7_SoftRegisters, VBlankTimeout);
case DRAM_LOG_ADDR_H:
return offsetof(SMU7_SoftRegisters, DRAM_LOG_ADDR_H);
case DRAM_LOG_ADDR_L:
return offsetof(SMU7_SoftRegisters, DRAM_LOG_ADDR_L);
case DRAM_LOG_PHY_ADDR_H:
return offsetof(SMU7_SoftRegisters, DRAM_LOG_PHY_ADDR_H);
case DRAM_LOG_PHY_ADDR_L:
return offsetof(SMU7_SoftRegisters, DRAM_LOG_PHY_ADDR_L);
case DRAM_LOG_BUFF_SIZE:
return offsetof(SMU7_SoftRegisters, DRAM_LOG_BUFF_SIZE);
}
case SMU_Discrete_DpmTable:
switch (member) {

File diff suppressed because it is too large Load Diff

View File

@ -1,53 +0,0 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef FIJI_SMC_H
#define FIJI_SMC_H
#include "smumgr.h"
#include "smu73.h"
struct fiji_pt_defaults {
uint8_t SviLoadLineEn;
uint8_t SviLoadLineVddC;
uint8_t TDC_VDDC_ThrottleReleaseLimitPerc;
uint8_t TDC_MAWt;
uint8_t TdcWaterfallCtl;
uint8_t DTEAmbientTempBase;
};
int fiji_populate_all_graphic_levels(struct pp_hwmgr *hwmgr);
int fiji_populate_all_memory_levels(struct pp_hwmgr *hwmgr);
int fiji_init_smc_table(struct pp_hwmgr *hwmgr);
int fiji_thermal_setup_fan_table(struct pp_hwmgr *hwmgr);
int fiji_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type);
int fiji_update_sclk_threshold(struct pp_hwmgr *hwmgr);
uint32_t fiji_get_offsetof(uint32_t type, uint32_t member);
uint32_t fiji_get_mac_definition(uint32_t value);
int fiji_process_firmware_header(struct pp_hwmgr *hwmgr);
int fiji_initialize_mc_reg_table(struct pp_hwmgr *hwmgr);
bool fiji_is_dpm_running(struct pp_hwmgr *hwmgr);
int fiji_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr,
struct amd_pp_profile *request);
int fiji_thermal_avfs_enable(struct pp_hwmgr *hwmgr);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,15 @@
#include "smu7_smumgr.h"
struct fiji_pt_defaults {
uint8_t SviLoadLineEn;
uint8_t SviLoadLineVddC;
uint8_t TDC_VDDC_ThrottleReleaseLimitPerc;
uint8_t TDC_MAWt;
uint8_t TdcWaterfallCtl;
uint8_t DTEAmbientTempBase;
};
struct fiji_smumgr {
struct smu7_smumgr smu7_data;
struct SMU73_Discrete_DpmTable smc_state_table;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,44 +0,0 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef POLARIS10_SMC_H
#define POLARIS10_SMC_H
#include "smumgr.h"
int polaris10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr);
int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr);
int polaris10_init_smc_table(struct pp_hwmgr *hwmgr);
int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr);
int polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr);
int polaris10_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type);
int polaris10_update_sclk_threshold(struct pp_hwmgr *hwmgr);
uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member);
uint32_t polaris10_get_mac_definition(uint32_t value);
int polaris10_process_firmware_header(struct pp_hwmgr *hwmgr);
bool polaris10_is_dpm_running(struct pp_hwmgr *hwmgr);
int polaris10_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr,
struct amd_pp_profile *request);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -25,12 +25,13 @@
#include "pp_debug.h"
#include "smumgr.h"
#include "smu_ucode_xfer_vi.h"
#include "smu/smu_7_1_3_d.h"
#include "smu/smu_7_1_3_sh_mask.h"
#include "ppatomctrl.h"
#include "cgs_common.h"
#include "smu7_ppsmc.h"
#include "smu7_smumgr.h"
#include "smu7_common.h"
#include "polaris10_pwrvirus.h"
#define SMU7_SMC_SIZE 0x20000
@ -540,6 +541,47 @@ int smu7_upload_smu_firmware_image(struct pp_hwmgr *hwmgr)
return result;
}
static void execute_pwr_table(struct pp_hwmgr *hwmgr, const PWR_Command_Table *pvirus, int size)
{
int i;
uint32_t reg, data;
for (i = 0; i < size; i++) {
reg = pvirus->reg;
data = pvirus->data;
if (reg != 0xffffffff)
cgs_write_register(hwmgr->device, reg, data);
else
break;
pvirus++;
}
}
static void execute_pwr_dfy_table(struct pp_hwmgr *hwmgr, const PWR_DFY_Section *section)
{
int i;
cgs_write_register(hwmgr->device, mmCP_DFY_CNTL, section->dfy_cntl);
cgs_write_register(hwmgr->device, mmCP_DFY_ADDR_HI, section->dfy_addr_hi);
cgs_write_register(hwmgr->device, mmCP_DFY_ADDR_LO, section->dfy_addr_lo);
for (i = 0; i < section->dfy_size; i++)
cgs_write_register(hwmgr->device, mmCP_DFY_DATA_0, section->dfy_data[i]);
}
int smu7_setup_pwr_virus(struct pp_hwmgr *hwmgr)
{
execute_pwr_table(hwmgr, pwr_virus_table_pre, ARRAY_SIZE(pwr_virus_table_pre));
execute_pwr_dfy_table(hwmgr, &pwr_virus_section1);
execute_pwr_dfy_table(hwmgr, &pwr_virus_section2);
execute_pwr_dfy_table(hwmgr, &pwr_virus_section3);
execute_pwr_dfy_table(hwmgr, &pwr_virus_section4);
execute_pwr_dfy_table(hwmgr, &pwr_virus_section5);
execute_pwr_dfy_table(hwmgr, &pwr_virus_section6);
execute_pwr_table(hwmgr, pwr_virus_table_post, ARRAY_SIZE(pwr_virus_table_post));
return 0;
}
int smu7_init(struct pp_hwmgr *hwmgr)
{
struct smu7_smumgr *smu_data;

View File

@ -88,4 +88,6 @@ int smu7_upload_smu_firmware_image(struct pp_hwmgr *hwmgr);
int smu7_init(struct pp_hwmgr *hwmgr);
int smu7_smu_fini(struct pp_hwmgr *hwmgr);
#endif
int smu7_setup_pwr_virus(struct pp_hwmgr *hwmgr);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,62 +0,0 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef _TONGA_SMC_H
#define _TONGA_SMC_H
#include "smumgr.h"
#include "smu72.h"
#define ASICID_IS_TONGA_P(wDID, bRID) \
(((wDID == 0x6930) && ((bRID == 0xF0) || (bRID == 0xF1) || (bRID == 0xFF))) \
|| ((wDID == 0x6920) && ((bRID == 0) || (bRID == 1))))
struct tonga_pt_defaults {
uint8_t svi_load_line_en;
uint8_t svi_load_line_vddC;
uint8_t tdc_vddc_throttle_release_limit_perc;
uint8_t tdc_mawt;
uint8_t tdc_waterfall_ctl;
uint8_t dte_ambient_temp_base;
uint32_t display_cac;
uint32_t bapm_temp_gradient;
uint16_t bapmti_r[SMU72_DTE_ITERATIONS * SMU72_DTE_SOURCES * SMU72_DTE_SINKS];
uint16_t bapmti_rc[SMU72_DTE_ITERATIONS * SMU72_DTE_SOURCES * SMU72_DTE_SINKS];
};
int tonga_populate_all_graphic_levels(struct pp_hwmgr *hwmgr);
int tonga_populate_all_memory_levels(struct pp_hwmgr *hwmgr);
int tonga_init_smc_table(struct pp_hwmgr *hwmgr);
int tonga_thermal_setup_fan_table(struct pp_hwmgr *hwmgr);
int tonga_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type);
int tonga_update_sclk_threshold(struct pp_hwmgr *hwmgr);
uint32_t tonga_get_offsetof(uint32_t type, uint32_t member);
uint32_t tonga_get_mac_definition(uint32_t value);
int tonga_process_firmware_header(struct pp_hwmgr *hwmgr);
int tonga_initialize_mc_reg_table(struct pp_hwmgr *hwmgr);
bool tonga_is_dpm_running(struct pp_hwmgr *hwmgr);
int tonga_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr,
struct amd_pp_profile *request);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -25,8 +25,26 @@
#define _TONGA_SMUMGR_H_
#include "smu72_discrete.h"
#include "smu7_smumgr.h"
#include "smu72.h"
#define ASICID_IS_TONGA_P(wDID, bRID) \
(((wDID == 0x6930) && ((bRID == 0xF0) || (bRID == 0xF1) || (bRID == 0xFF))) \
|| ((wDID == 0x6920) && ((bRID == 0) || (bRID == 1))))
struct tonga_pt_defaults {
uint8_t svi_load_line_en;
uint8_t svi_load_line_vddC;
uint8_t tdc_vddc_throttle_release_limit_perc;
uint8_t tdc_mawt;
uint8_t tdc_waterfall_ctl;
uint8_t dte_ambient_temp_base;
uint32_t display_cac;
uint32_t bapm_temp_gradient;
uint16_t bapmti_r[SMU72_DTE_ITERATIONS * SMU72_DTE_SOURCES * SMU72_DTE_SINKS];
uint16_t bapmti_rc[SMU72_DTE_ITERATIONS * SMU72_DTE_SOURCES * SMU72_DTE_SINKS];
};
struct tonga_mc_reg_entry {
uint32_t mclk_max;

View File

@ -133,6 +133,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
entity->rq = rq;
entity->sched = sched;
spin_lock_init(&entity->rq_lock);
spin_lock_init(&entity->queue_lock);
r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
if (r)
@ -204,7 +205,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity)
{
struct amd_sched_rq *rq = entity->rq;
int r;
if (!amd_sched_entity_is_initialized(sched, entity))
@ -218,7 +218,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
else
r = wait_event_killable(sched->job_scheduled,
amd_sched_entity_is_idle(entity));
amd_sched_rq_remove_entity(rq, entity);
amd_sched_entity_set_rq(entity, NULL);
if (r) {
struct amd_sched_job *job;
@ -257,6 +257,24 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb
dma_fence_put(f);
}
void amd_sched_entity_set_rq(struct amd_sched_entity *entity,
struct amd_sched_rq *rq)
{
if (entity->rq == rq)
return;
spin_lock(&entity->rq_lock);
if (entity->rq)
amd_sched_rq_remove_entity(entity->rq, entity);
entity->rq = rq;
if (rq)
amd_sched_rq_add_entity(rq, entity);
spin_unlock(&entity->rq_lock);
}
bool amd_sched_dependency_optimized(struct dma_fence* fence,
struct amd_sched_entity *entity)
{
@ -354,7 +372,9 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
/* first job wakes up scheduler */
if (first) {
/* Add the entity to the run queue */
spin_lock(&entity->rq_lock);
amd_sched_rq_add_entity(entity->rq, entity);
spin_unlock(&entity->rq_lock);
amd_sched_wakeup(sched);
}
return added;
@ -386,6 +406,7 @@ static void amd_sched_job_finish(struct work_struct *work)
schedule_delayed_work(&next->work_tdr, sched->timeout);
}
spin_unlock(&sched->job_list_lock);
dma_fence_put(&s_job->s_fence->finished);
sched->ops->free_job(s_job);
}
@ -566,6 +587,7 @@ static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
container_of(cb, struct amd_sched_fence, cb);
struct amd_gpu_scheduler *sched = s_fence->sched;
dma_fence_get(&s_fence->finished);
atomic_dec(&sched->hw_rq_count);
amd_sched_fence_finished(s_fence);
@ -618,9 +640,6 @@ static int amd_sched_main(void *param)
fence = sched->ops->run_job(sched_job);
amd_sched_fence_scheduled(s_fence);
/* amd_sched_process_job drops the job's reference of the fence. */
sched_job->s_fence = NULL;
if (fence) {
s_fence->parent = dma_fence_get(fence);
r = dma_fence_add_callback(fence, &s_fence->cb,

View File

@ -39,6 +39,7 @@ struct amd_sched_rq;
struct amd_sched_entity {
struct list_head list;
struct amd_sched_rq *rq;
spinlock_t rq_lock;
struct amd_gpu_scheduler *sched;
spinlock_t queue_lock;
@ -115,9 +116,14 @@ struct amd_sched_backend_ops {
enum amd_sched_priority {
AMD_SCHED_PRIORITY_MIN,
AMD_SCHED_PRIORITY_NORMAL = AMD_SCHED_PRIORITY_MIN,
AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN,
AMD_SCHED_PRIORITY_NORMAL,
AMD_SCHED_PRIORITY_HIGH_SW,
AMD_SCHED_PRIORITY_HIGH_HW,
AMD_SCHED_PRIORITY_KERNEL,
AMD_SCHED_PRIORITY_MAX
AMD_SCHED_PRIORITY_MAX,
AMD_SCHED_PRIORITY_INVALID = -1,
AMD_SCHED_PRIORITY_UNSET = -2
};
/**
@ -150,6 +156,8 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity);
void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
void amd_sched_entity_set_rq(struct amd_sched_entity *entity,
struct amd_sched_rq *rq);
int amd_sched_fence_slab_init(void);
void amd_sched_fence_slab_fini(void);
@ -167,4 +175,11 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
bool amd_sched_dependency_optimized(struct dma_fence* fence,
struct amd_sched_entity *entity);
void amd_sched_job_kickout(struct amd_sched_job *s_job);
static inline enum amd_sched_priority
amd_sched_get_job_priority(struct amd_sched_job *job)
{
return (job->s_entity->rq - job->sched->sched_rq);
}
#endif

View File

@ -95,7 +95,7 @@ struct ttm_pool_opts {
unsigned small;
};
#define NUM_POOLS 4
#define NUM_POOLS 6
/**
* struct ttm_pool_manager - Holds memory pools for fst allocation
@ -122,6 +122,8 @@ struct ttm_pool_manager {
struct ttm_page_pool uc_pool;
struct ttm_page_pool wc_pool_dma32;
struct ttm_page_pool uc_pool_dma32;
struct ttm_page_pool wc_pool_huge;
struct ttm_page_pool uc_pool_huge;
} ;
};
};
@ -256,8 +258,8 @@ static int set_pages_array_uc(struct page **pages, int addrinarray)
/**
* Select the right pool or requested caching state and ttm flags. */
static struct ttm_page_pool *ttm_get_pool(int flags,
enum ttm_caching_state cstate)
static struct ttm_page_pool *ttm_get_pool(int flags, bool huge,
enum ttm_caching_state cstate)
{
int pool_index;
@ -269,9 +271,15 @@ static struct ttm_page_pool *ttm_get_pool(int flags,
else
pool_index = 0x1;
if (flags & TTM_PAGE_FLAG_DMA32)
if (flags & TTM_PAGE_FLAG_DMA32) {
if (huge)
return NULL;
pool_index |= 0x2;
} else if (huge) {
pool_index |= 0x4;
}
return &_manager->pools[pool_index];
}
@ -494,12 +502,14 @@ static void ttm_handle_caching_state_failure(struct list_head *pages,
* pages returned in pages array.
*/
static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
int ttm_flags, enum ttm_caching_state cstate, unsigned count)
int ttm_flags, enum ttm_caching_state cstate,
unsigned count, unsigned order)
{
struct page **caching_array;
struct page *p;
int r = 0;
unsigned i, cpages;
unsigned i, j, cpages;
unsigned npages = 1 << order;
unsigned max_cpages = min(count,
(unsigned)(PAGE_SIZE/sizeof(struct page *)));
@ -512,7 +522,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
}
for (i = 0, cpages = 0; i < count; ++i) {
p = alloc_page(gfp_flags);
p = alloc_pages(gfp_flags, order);
if (!p) {
pr_err("Unable to get page %u\n", i);
@ -531,14 +541,18 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
goto out;
}
list_add(&p->lru, pages);
#ifdef CONFIG_HIGHMEM
/* gfp flags of highmem page should never be dma32 so we
* we should be fine in such case
*/
if (!PageHighMem(p))
if (PageHighMem(p))
continue;
#endif
{
caching_array[cpages++] = p;
for (j = 0; j < npages; ++j) {
caching_array[cpages++] = p++;
if (cpages == max_cpages) {
r = ttm_set_pages_caching(caching_array,
@ -552,8 +566,6 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
cpages = 0;
}
}
list_add(&p->lru, pages);
}
if (cpages) {
@ -573,9 +585,9 @@ out:
* Fill the given pool if there aren't enough pages and the requested number of
* pages is small.
*/
static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool,
int ttm_flags, enum ttm_caching_state cstate, unsigned count,
unsigned long *irq_flags)
static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, int ttm_flags,
enum ttm_caching_state cstate,
unsigned count, unsigned long *irq_flags)
{
struct page *p;
int r;
@ -605,7 +617,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool,
INIT_LIST_HEAD(&new_pages);
r = ttm_alloc_new_pages(&new_pages, pool->gfp_flags, ttm_flags,
cstate, alloc_size);
cstate, alloc_size, 0);
spin_lock_irqsave(&pool->lock, *irq_flags);
if (!r) {
@ -627,22 +639,25 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool,
}
/**
* Cut 'count' number of pages from the pool and put them on the return list.
* Allocate pages from the pool and put them on the return list.
*
* @return count of pages still required to fulfill the request.
* @return zero for success or negative error code.
*/
static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool,
struct list_head *pages,
int ttm_flags,
enum ttm_caching_state cstate,
unsigned count)
static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
struct list_head *pages,
int ttm_flags,
enum ttm_caching_state cstate,
unsigned count, unsigned order)
{
unsigned long irq_flags;
struct list_head *p;
unsigned i;
int r = 0;
spin_lock_irqsave(&pool->lock, irq_flags);
ttm_page_pool_fill_locked(pool, ttm_flags, cstate, count, &irq_flags);
if (!order)
ttm_page_pool_fill_locked(pool, ttm_flags, cstate, count,
&irq_flags);
if (count >= pool->npages) {
/* take all pages from the pool */
@ -672,32 +687,126 @@ static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool,
count = 0;
out:
spin_unlock_irqrestore(&pool->lock, irq_flags);
return count;
/* clear the pages coming from the pool if requested */
if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
struct page *page;
list_for_each_entry(page, pages, lru) {
if (PageHighMem(page))
clear_highpage(page);
else
clear_page(page_address(page));
}
}
/* If pool didn't have enough pages allocate new one. */
if (count) {
gfp_t gfp_flags = pool->gfp_flags;
/* set zero flag for page allocation if required */
if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
gfp_flags |= __GFP_ZERO;
/* ttm_alloc_new_pages doesn't reference pool so we can run
* multiple requests in parallel.
**/
r = ttm_alloc_new_pages(pages, gfp_flags, ttm_flags, cstate,
count, order);
}
return r;
}
/* Put all pages in pages list to correct pool to wait for reuse */
static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
enum ttm_caching_state cstate)
{
struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct ttm_page_pool *huge = ttm_get_pool(flags, true, cstate);
#endif
unsigned long irq_flags;
struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
unsigned i;
if (pool == NULL) {
/* No pool for this memory type so free the pages */
for (i = 0; i < npages; i++) {
if (pages[i]) {
if (page_count(pages[i]) != 1)
pr_err("Erroneous page count. Leaking pages.\n");
__free_page(pages[i]);
pages[i] = NULL;
i = 0;
while (i < npages) {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct page *p = pages[i];
#endif
unsigned order = 0, j;
if (!pages[i]) {
++i;
continue;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
for (j = 0; j < HPAGE_PMD_NR; ++j)
if (p++ != pages[i + j])
break;
if (j == HPAGE_PMD_NR)
order = HPAGE_PMD_ORDER;
#endif
if (page_count(pages[i]) != 1)
pr_err("Erroneous page count. Leaking pages.\n");
__free_pages(pages[i], order);
j = 1 << order;
while (j) {
pages[i++] = NULL;
--j;
}
}
return;
}
i = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (huge) {
unsigned max_size, n2free;
spin_lock_irqsave(&huge->lock, irq_flags);
while (i < npages) {
struct page *p = pages[i];
unsigned j;
if (!p)
break;
for (j = 0; j < HPAGE_PMD_NR; ++j)
if (p++ != pages[i + j])
break;
if (j != HPAGE_PMD_NR)
break;
list_add_tail(&pages[i]->lru, &huge->list);
for (j = 0; j < HPAGE_PMD_NR; ++j)
pages[i++] = NULL;
huge->npages++;
}
/* Check that we don't go over the pool limit */
max_size = _manager->options.max_size;
max_size /= HPAGE_PMD_NR;
if (huge->npages > max_size)
n2free = huge->npages - max_size;
else
n2free = 0;
spin_unlock_irqrestore(&huge->lock, irq_flags);
if (n2free)
ttm_page_pool_free(huge, n2free, false);
}
#endif
spin_lock_irqsave(&pool->lock, irq_flags);
for (i = 0; i < npages; i++) {
while (i < npages) {
if (pages[i]) {
if (page_count(pages[i]) != 1)
pr_err("Erroneous page count. Leaking pages.\n");
@ -705,6 +814,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
pages[i] = NULL;
pool->npages++;
}
++i;
}
/* Check that we don't go over the pool limit */
npages = 0;
@ -727,25 +837,52 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
enum ttm_caching_state cstate)
{
struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
struct ttm_page_pool *pool = ttm_get_pool(flags, false, cstate);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct ttm_page_pool *huge = ttm_get_pool(flags, true, cstate);
#endif
struct list_head plist;
struct page *p = NULL;
gfp_t gfp_flags = GFP_USER;
unsigned count;
int r;
/* set zero flag for page allocation if required */
if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
gfp_flags |= __GFP_ZERO;
/* No pool for cached pages */
if (pool == NULL) {
gfp_t gfp_flags = GFP_USER;
unsigned i;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
unsigned j;
#endif
/* set zero flag for page allocation if required */
if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
gfp_flags |= __GFP_ZERO;
if (flags & TTM_PAGE_FLAG_DMA32)
gfp_flags |= GFP_DMA32;
else
gfp_flags |= GFP_HIGHUSER;
for (r = 0; r < npages; ++r) {
i = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
while (npages >= HPAGE_PMD_NR) {
gfp_t huge_flags = gfp_flags;
huge_flags |= GFP_TRANSHUGE;
huge_flags &= ~__GFP_MOVABLE;
huge_flags &= ~__GFP_COMP;
p = alloc_pages(huge_flags, HPAGE_PMD_ORDER);
if (!p)
break;
for (j = 0; j < HPAGE_PMD_NR; ++j)
pages[i++] = p++;
npages -= HPAGE_PMD_NR;
}
#endif
while (npages) {
p = alloc_page(gfp_flags);
if (!p) {
@ -753,50 +890,45 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
return -ENOMEM;
}
pages[r] = p;
pages[i++] = p;
--npages;
}
return 0;
}
/* combine zero flag to pool flags */
gfp_flags |= pool->gfp_flags;
/* First we take pages from the pool */
INIT_LIST_HEAD(&plist);
npages = ttm_page_pool_get_pages(pool, &plist, flags, cstate, npages);
count = 0;
list_for_each_entry(p, &plist, lru) {
pages[count++] = p;
}
/* clear the pages coming from the pool if requested */
if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
list_for_each_entry(p, &plist, lru) {
if (PageHighMem(p))
clear_highpage(p);
else
clear_page(page_address(p));
}
}
/* If pool didn't have enough pages allocate new one. */
if (npages > 0) {
/* ttm_alloc_new_pages doesn't reference pool so we can run
* multiple requests in parallel.
**/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (huge && npages >= HPAGE_PMD_NR) {
INIT_LIST_HEAD(&plist);
r = ttm_alloc_new_pages(&plist, gfp_flags, flags, cstate, npages);
ttm_page_pool_get_pages(huge, &plist, flags, cstate,
npages / HPAGE_PMD_NR,
HPAGE_PMD_ORDER);
list_for_each_entry(p, &plist, lru) {
pages[count++] = p;
}
if (r) {
/* If there is any pages in the list put them back to
* the pool. */
pr_err("Failed to allocate extra pages for large request\n");
ttm_put_pages(pages, count, flags, cstate);
return r;
unsigned j;
for (j = 0; j < HPAGE_PMD_NR; ++j)
pages[count++] = &p[j];
}
}
#endif
INIT_LIST_HEAD(&plist);
r = ttm_page_pool_get_pages(pool, &plist, flags, cstate,
npages - count, 0);
list_for_each_entry(p, &plist, lru)
pages[count++] = p;
if (r) {
/* If there is any pages in the list put them back to
* the pool.
*/
pr_err("Failed to allocate extra pages for large request\n");
ttm_put_pages(pages, count, flags, cstate);
return r;
}
return 0;
}
@ -832,6 +964,14 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
ttm_page_pool_init_locked(&_manager->uc_pool_dma32,
GFP_USER | GFP_DMA32, "uc dma");
ttm_page_pool_init_locked(&_manager->wc_pool_huge,
GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP),
"wc huge");
ttm_page_pool_init_locked(&_manager->uc_pool_huge,
GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP)
, "uc huge");
_manager->options.max_size = max_pages;
_manager->options.small = SMALL_ALLOCATION;
_manager->options.alloc_size = NUM_PAGES_TO_ALLOC;
@ -873,15 +1013,14 @@ int ttm_pool_populate(struct ttm_tt *ttm)
if (ttm->state != tt_unpopulated)
return 0;
for (i = 0; i < ttm->num_pages; ++i) {
ret = ttm_get_pages(&ttm->pages[i], 1,
ttm->page_flags,
ttm->caching_state);
if (ret != 0) {
ttm_pool_unpopulate(ttm);
return -ENOMEM;
}
ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
ttm->caching_state);
if (unlikely(ret != 0)) {
ttm_pool_unpopulate(ttm);
return ret;
}
for (i = 0; i < ttm->num_pages; ++i) {
ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
PAGE_SIZE);
if (unlikely(ret != 0)) {
@ -908,14 +1047,14 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm)
unsigned i;
for (i = 0; i < ttm->num_pages; ++i) {
if (ttm->pages[i]) {
ttm_mem_global_free_page(ttm->glob->mem_glob,
ttm->pages[i], PAGE_SIZE);
ttm_put_pages(&ttm->pages[i], 1,
ttm->page_flags,
ttm->caching_state);
}
if (!ttm->pages[i])
continue;
ttm_mem_global_free_page(ttm->glob->mem_glob, ttm->pages[i],
PAGE_SIZE);
}
ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
ttm->caching_state);
ttm->state = tt_unpopulated;
}
EXPORT_SYMBOL(ttm_pool_unpopulate);
@ -923,16 +1062,26 @@ EXPORT_SYMBOL(ttm_pool_unpopulate);
#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU)
int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt)
{
unsigned i;
unsigned i, j;
int r;
r = ttm_pool_populate(&tt->ttm);
if (r)
return r;
for (i = 0; i < tt->ttm.num_pages; i++) {
for (i = 0; i < tt->ttm.num_pages; ++i) {
struct page *p = tt->ttm.pages[i];
size_t num_pages = 1;
for (j = i + 1; j < tt->ttm.num_pages; ++j) {
if (++p != tt->ttm.pages[j])
break;
++num_pages;
}
tt->dma_address[i] = dma_map_page(dev, tt->ttm.pages[i],
0, PAGE_SIZE,
0, num_pages * PAGE_SIZE,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, tt->dma_address[i])) {
while (i--) {
@ -943,6 +1092,11 @@ int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt)
ttm_pool_unpopulate(&tt->ttm);
return -EFAULT;
}
for (j = 1; j < num_pages; ++j) {
tt->dma_address[i + 1] = tt->dma_address[i] + PAGE_SIZE;
++i;
}
}
return 0;
}
@ -950,13 +1104,28 @@ EXPORT_SYMBOL(ttm_populate_and_map_pages);
void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt)
{
unsigned i;
for (i = 0; i < tt->ttm.num_pages; i++) {
if (tt->dma_address[i]) {
dma_unmap_page(dev, tt->dma_address[i],
PAGE_SIZE, DMA_BIDIRECTIONAL);
unsigned i, j;
for (i = 0; i < tt->ttm.num_pages;) {
struct page *p = tt->ttm.pages[i];
size_t num_pages = 1;
if (!tt->dma_address[i] || !tt->ttm.pages[i]) {
++i;
continue;
}
for (j = i + 1; j < tt->ttm.num_pages; ++j) {
if (++p != tt->ttm.pages[j])
break;
++num_pages;
}
dma_unmap_page(dev, tt->dma_address[i], num_pages * PAGE_SIZE,
DMA_BIDIRECTIONAL);
i += num_pages;
}
ttm_pool_unpopulate(&tt->ttm);
}
@ -972,12 +1141,12 @@ int ttm_page_alloc_debugfs(struct seq_file *m, void *data)
seq_printf(m, "No pool allocator running.\n");
return 0;
}
seq_printf(m, "%6s %12s %13s %8s\n",
seq_printf(m, "%7s %12s %13s %8s\n",
h[0], h[1], h[2], h[3]);
for (i = 0; i < NUM_POOLS; ++i) {
p = &_manager->pools[i];
seq_printf(m, "%6s %12ld %13ld %8d\n",
seq_printf(m, "%7s %12ld %13ld %8d\n",
p->name, p->nrefills,
p->nfrees, p->npages);
}

View File

@ -913,6 +913,7 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
if (huge) {
gfp_flags |= GFP_TRANSHUGE;
gfp_flags &= ~__GFP_MOVABLE;
gfp_flags &= ~__GFP_COMP;
}
return gfp_flags;

View File

@ -53,6 +53,7 @@ extern "C" {
#define DRM_AMDGPU_WAIT_FENCES 0x12
#define DRM_AMDGPU_VM 0x13
#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
#define DRM_AMDGPU_SCHED 0x15
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@ -69,6 +70,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
#define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
#define AMDGPU_GEM_DOMAIN_CPU 0x1
#define AMDGPU_GEM_DOMAIN_GTT 0x2
@ -91,6 +93,8 @@ extern "C" {
#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5)
/* Flag that BO is always valid in this VM */
#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6)
/* Flag that BO sharing will be explicitly synchronized */
#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7)
struct drm_amdgpu_gem_create_in {
/** the requested memory size */
@ -166,13 +170,22 @@ union drm_amdgpu_bo_list {
/* unknown cause */
#define AMDGPU_CTX_UNKNOWN_RESET 3
/* Context priority level */
#define AMDGPU_CTX_PRIORITY_UNSET -2048
#define AMDGPU_CTX_PRIORITY_VERY_LOW -1023
#define AMDGPU_CTX_PRIORITY_LOW -512
#define AMDGPU_CTX_PRIORITY_NORMAL 0
/* Selecting a priority above NORMAL requires CAP_SYS_NICE or DRM_MASTER */
#define AMDGPU_CTX_PRIORITY_HIGH 512
#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023
struct drm_amdgpu_ctx_in {
/** AMDGPU_CTX_OP_* */
__u32 op;
/** For future use, no flags defined so far */
__u32 flags;
__u32 ctx_id;
__u32 _pad;
__s32 priority;
};
union drm_amdgpu_ctx_out {
@ -216,6 +229,21 @@ union drm_amdgpu_vm {
struct drm_amdgpu_vm_out out;
};
/* sched ioctl */
#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1
struct drm_amdgpu_sched_in {
/* AMDGPU_SCHED_OP_* */
__u32 op;
__u32 fd;
__s32 priority;
__u32 flags;
};
union drm_amdgpu_sched {
struct drm_amdgpu_sched_in in;
};
/*
* This is not a reliable API and you should expect it to fail for any
* number of reasons and have fallback path that do not use userptr to
@ -629,6 +657,7 @@ struct drm_amdgpu_cs_chunk_data {
#define AMDGPU_INFO_SENSOR_VDDGFX 0x7
/* Number of VRAM page faults on CPU access. */
#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E
#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F
#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff