drm/radeon: validate relocations in the order determined by userspace v3
Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to a number from 0 to 15. The higher the number, the higher the priority, which means a buffer with a higher number will be validated sooner. The old behavior is preserved: Buffers used for write are prioritized over read-only buffers if the userspace doesn't set the number. v2: add buffers to buckets directly, then concatenate them v3: use a stable sort Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com>
This commit is contained in:
parent
4330441a74
commit
c9b7654889
|
@ -483,7 +483,6 @@ struct radeon_bo_list {
|
|||
struct ttm_validate_buffer tv;
|
||||
struct radeon_bo *bo;
|
||||
uint64_t gpu_offset;
|
||||
bool written;
|
||||
unsigned domain;
|
||||
unsigned alt_domain;
|
||||
u32 tiling_flags;
|
||||
|
|
|
@ -31,10 +31,52 @@
|
|||
#include "radeon.h"
|
||||
#include "radeon_trace.h"
|
||||
|
||||
#define RADEON_CS_MAX_PRIORITY 32u
|
||||
#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
|
||||
|
||||
/* This is based on the bucket sort with O(n) time complexity.
|
||||
* An item with priority "i" is added to bucket[i]. The lists are then
|
||||
* concatenated in descending order.
|
||||
*/
|
||||
struct radeon_cs_buckets {
|
||||
struct list_head bucket[RADEON_CS_NUM_BUCKETS];
|
||||
};
|
||||
|
||||
static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
|
||||
INIT_LIST_HEAD(&b->bucket[i]);
|
||||
}
|
||||
|
||||
static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
|
||||
struct list_head *item, unsigned priority)
|
||||
{
|
||||
/* Since buffers which appear sooner in the relocation list are
|
||||
* likely to be used more often than buffers which appear later
|
||||
* in the list, the sort mustn't change the ordering of buffers
|
||||
* with the same priority, i.e. it must be stable.
|
||||
*/
|
||||
list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
|
||||
}
|
||||
|
||||
static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
|
||||
struct list_head *out_list)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* Connect the sorted buckets in the output list. */
|
||||
for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
|
||||
list_splice(&b->bucket[i], out_list);
|
||||
}
|
||||
}
|
||||
|
||||
static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
|
||||
{
|
||||
struct drm_device *ddev = p->rdev->ddev;
|
||||
struct radeon_cs_chunk *chunk;
|
||||
struct radeon_cs_buckets buckets;
|
||||
unsigned i, j;
|
||||
bool duplicate;
|
||||
|
||||
|
@ -53,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
|
|||
if (p->relocs == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
radeon_cs_buckets_init(&buckets);
|
||||
|
||||
for (i = 0; i < p->nrelocs; i++) {
|
||||
struct drm_radeon_cs_reloc *r;
|
||||
unsigned priority;
|
||||
|
||||
duplicate = false;
|
||||
r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
|
||||
|
@ -80,7 +126,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
|
|||
p->relocs_ptr[i] = &p->relocs[i];
|
||||
p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
|
||||
p->relocs[i].lobj.bo = p->relocs[i].robj;
|
||||
p->relocs[i].lobj.written = !!r->write_domain;
|
||||
|
||||
/* The userspace buffer priorities are from 0 to 15. A higher
|
||||
* number means the buffer is more important.
|
||||
* Also, the buffers used for write have a higher priority than
|
||||
* the buffers used for read only, which doubles the range
|
||||
* to 0 to 31. 32 is reserved for the kernel driver.
|
||||
*/
|
||||
priority = (r->flags & 0xf) * 2 + !!r->write_domain;
|
||||
|
||||
/* the first reloc of an UVD job is the msg and that must be in
|
||||
VRAM, also but everything into VRAM on AGP cards to avoid
|
||||
|
@ -94,6 +147,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
|
|||
p->relocs[i].lobj.alt_domain =
|
||||
RADEON_GEM_DOMAIN_VRAM;
|
||||
|
||||
/* prioritize this over any other relocation */
|
||||
priority = RADEON_CS_MAX_PRIORITY;
|
||||
} else {
|
||||
uint32_t domain = r->write_domain ?
|
||||
r->write_domain : r->read_domains;
|
||||
|
@ -107,9 +162,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
|
|||
p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
|
||||
p->relocs[i].handle = r->handle;
|
||||
|
||||
radeon_bo_list_add_object(&p->relocs[i].lobj,
|
||||
&p->validated);
|
||||
radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj.tv.head,
|
||||
priority);
|
||||
}
|
||||
|
||||
radeon_cs_buckets_get_list(&buckets, &p->validated);
|
||||
|
||||
return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
|
||||
}
|
||||
|
||||
|
|
|
@ -366,16 +366,6 @@ void radeon_bo_fini(struct radeon_device *rdev)
|
|||
arch_phys_wc_del(rdev->mc.vram_mtrr);
|
||||
}
|
||||
|
||||
void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
|
||||
struct list_head *head)
|
||||
{
|
||||
if (lobj->written) {
|
||||
list_add(&lobj->tv.head, head);
|
||||
} else {
|
||||
list_add_tail(&lobj->tv.head, head);
|
||||
}
|
||||
}
|
||||
|
||||
int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
|
||||
struct list_head *head, int ring)
|
||||
{
|
||||
|
|
|
@ -138,8 +138,6 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev);
|
|||
extern void radeon_bo_force_delete(struct radeon_device *rdev);
|
||||
extern int radeon_bo_init(struct radeon_device *rdev);
|
||||
extern void radeon_bo_fini(struct radeon_device *rdev);
|
||||
extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
|
||||
struct list_head *head);
|
||||
extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
|
||||
struct list_head *head, int ring);
|
||||
extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
|
||||
|
|
Loading…
Reference in New Issue