drm/amdgpu: move get_user_pages out of amdgpu_ttm_tt_pin_userptr v6
That avoids lock inversion between the BO reservation lock and the anon_vma lock. v2: * Changed amdgpu_bo_list_entry.user_pages to an array of pointers * Lock mmap_sem only for get_user_pages * Added invalidation of unbound userpointer BOs * Fixed memory leak and page reference leak v3 (chk): * Revert locking mmap_sem only for_get user_pages * Revert adding invalidation of unbound userpointer BOs * Sanitize and fix error handling v4 (chk): * Init userpages pointer everywhere. * Fix error handling when get_user_pages() fails. * Add invalidation of unbound userpointer BOs again. v5 (chk): * Add maximum number of tries. v6 (chk): * Fix error handling when we run out of tries. Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> (v4) Acked-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
d564a06e1c
commit
2f568dbd6b
|
@ -471,6 +471,8 @@ struct amdgpu_bo_list_entry {
|
|||
struct ttm_validate_buffer tv;
|
||||
struct amdgpu_bo_va *bo_va;
|
||||
uint32_t priority;
|
||||
struct page **user_pages;
|
||||
int user_invalidated;
|
||||
};
|
||||
|
||||
struct amdgpu_bo_va_mapping {
|
||||
|
@ -2365,11 +2367,14 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
|
|||
struct amdgpu_ring **out_ring);
|
||||
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
|
||||
bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
|
||||
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
|
||||
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
||||
uint32_t flags);
|
||||
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
|
||||
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
||||
unsigned long end);
|
||||
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
|
||||
int *last_invalidated);
|
||||
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
|
||||
uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
|
||||
struct ttm_mem_reg *mem);
|
||||
|
|
|
@ -200,6 +200,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
|
|||
|
||||
list_add_tail(&list->array[i].tv.head,
|
||||
&bucket[priority]);
|
||||
list->array[i].user_pages = NULL;
|
||||
}
|
||||
|
||||
/* Connect the sorted buckets in the output list. */
|
||||
|
|
|
@ -111,6 +111,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
|
|||
p->uf_entry.priority = 0;
|
||||
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
|
||||
p->uf_entry.tv.shared = true;
|
||||
p->uf_entry.user_pages = NULL;
|
||||
|
||||
drm_gem_object_unreference_unlocked(gobj);
|
||||
return 0;
|
||||
|
@ -297,6 +298,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
|
|||
|
||||
list_for_each_entry(lobj, validated, tv.head) {
|
||||
struct amdgpu_bo *bo = lobj->robj;
|
||||
bool binding_userptr = false;
|
||||
struct mm_struct *usermm;
|
||||
uint32_t domain;
|
||||
|
||||
|
@ -304,6 +306,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
|
|||
if (usermm && usermm != current->mm)
|
||||
return -EPERM;
|
||||
|
||||
/* Check if we have user pages and nobody bound the BO already */
|
||||
if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
|
||||
size_t size = sizeof(struct page *);
|
||||
|
||||
size *= bo->tbo.ttm->num_pages;
|
||||
memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
|
||||
binding_userptr = true;
|
||||
}
|
||||
|
||||
if (bo->pin_count)
|
||||
continue;
|
||||
|
||||
|
@ -334,6 +345,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
|
|||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
if (binding_userptr) {
|
||||
drm_free_large(lobj->user_pages);
|
||||
lobj->user_pages = NULL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -342,8 +358,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||
union drm_amdgpu_cs *cs)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
||||
struct amdgpu_bo_list_entry *e;
|
||||
struct list_head duplicates;
|
||||
bool need_mmap_lock = false;
|
||||
unsigned i, tries = 10;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&p->validated);
|
||||
|
@ -364,9 +382,81 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||
if (need_mmap_lock)
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
|
||||
if (unlikely(r != 0))
|
||||
goto error_reserve;
|
||||
while (1) {
|
||||
struct list_head need_pages;
|
||||
unsigned i;
|
||||
|
||||
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
|
||||
&duplicates);
|
||||
if (unlikely(r != 0))
|
||||
goto error_free_pages;
|
||||
|
||||
/* Without a BO list we don't have userptr BOs */
|
||||
if (!p->bo_list)
|
||||
break;
|
||||
|
||||
INIT_LIST_HEAD(&need_pages);
|
||||
for (i = p->bo_list->first_userptr;
|
||||
i < p->bo_list->num_entries; ++i) {
|
||||
|
||||
e = &p->bo_list->array[i];
|
||||
|
||||
if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
|
||||
&e->user_invalidated) && e->user_pages) {
|
||||
|
||||
/* We acquired a page array, but somebody
|
||||
* invalidated it. Free it an try again
|
||||
*/
|
||||
release_pages(e->user_pages,
|
||||
e->robj->tbo.ttm->num_pages,
|
||||
false);
|
||||
drm_free_large(e->user_pages);
|
||||
e->user_pages = NULL;
|
||||
}
|
||||
|
||||
if (e->robj->tbo.ttm->state != tt_bound &&
|
||||
!e->user_pages) {
|
||||
list_del(&e->tv.head);
|
||||
list_add(&e->tv.head, &need_pages);
|
||||
|
||||
amdgpu_bo_unreserve(e->robj);
|
||||
}
|
||||
}
|
||||
|
||||
if (list_empty(&need_pages))
|
||||
break;
|
||||
|
||||
/* Unreserve everything again. */
|
||||
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
||||
|
||||
/* We tried to often, just abort */
|
||||
if (!--tries) {
|
||||
r = -EDEADLK;
|
||||
goto error_free_pages;
|
||||
}
|
||||
|
||||
/* Fill the page arrays for all useptrs. */
|
||||
list_for_each_entry(e, &need_pages, tv.head) {
|
||||
struct ttm_tt *ttm = e->robj->tbo.ttm;
|
||||
|
||||
e->user_pages = drm_calloc_large(ttm->num_pages,
|
||||
sizeof(struct page*));
|
||||
if (!e->user_pages) {
|
||||
r = -ENOMEM;
|
||||
goto error_free_pages;
|
||||
}
|
||||
|
||||
r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
|
||||
if (r) {
|
||||
drm_free_large(e->user_pages);
|
||||
e->user_pages = NULL;
|
||||
goto error_free_pages;
|
||||
}
|
||||
}
|
||||
|
||||
/* And try again. */
|
||||
list_splice(&need_pages, &p->validated);
|
||||
}
|
||||
|
||||
amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
|
||||
|
||||
|
@ -398,10 +488,26 @@ error_validate:
|
|||
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
||||
}
|
||||
|
||||
error_reserve:
|
||||
error_free_pages:
|
||||
|
||||
if (need_mmap_lock)
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
if (p->bo_list) {
|
||||
for (i = p->bo_list->first_userptr;
|
||||
i < p->bo_list->num_entries; ++i) {
|
||||
e = &p->bo_list->array[i];
|
||||
|
||||
if (!e->user_pages)
|
||||
continue;
|
||||
|
||||
release_pages(e->user_pages,
|
||||
e->robj->tbo.ttm->num_pages,
|
||||
false);
|
||||
drm_free_large(e->user_pages);
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
|
@ -274,18 +274,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
|||
|
||||
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
|
||||
bo->tbo.ttm->pages);
|
||||
if (r)
|
||||
goto unlock_mmap_sem;
|
||||
|
||||
r = amdgpu_bo_reserve(bo, true);
|
||||
if (r) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
goto release_object;
|
||||
}
|
||||
if (r)
|
||||
goto free_pages;
|
||||
|
||||
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (r)
|
||||
goto release_object;
|
||||
goto free_pages;
|
||||
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
}
|
||||
|
||||
r = drm_gem_handle_create(filp, gobj, &handle);
|
||||
|
@ -297,6 +302,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
|||
args->handle = handle;
|
||||
return 0;
|
||||
|
||||
free_pages:
|
||||
release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);
|
||||
|
||||
unlock_mmap_sem:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
release_object:
|
||||
drm_gem_object_unreference_unlocked(gobj);
|
||||
|
||||
|
|
|
@ -508,22 +508,18 @@ struct amdgpu_ttm_tt {
|
|||
uint32_t userflags;
|
||||
spinlock_t guptasklock;
|
||||
struct list_head guptasks;
|
||||
atomic_t mmu_invalidations;
|
||||
};
|
||||
|
||||
/* prepare the sg table with the user pages */
|
||||
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
|
||||
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||
{
|
||||
struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
unsigned pinned = 0, nents;
|
||||
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
|
||||
unsigned pinned = 0;
|
||||
int r;
|
||||
|
||||
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
|
||||
enum dma_data_direction direction = write ?
|
||||
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
|
||||
|
||||
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
|
||||
/* check that we only pin down anonymous memory
|
||||
/* check that we only use anonymous memory
|
||||
to prevent problems with writeback */
|
||||
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
|
||||
struct vm_area_struct *vma;
|
||||
|
@ -536,7 +532,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
|
|||
do {
|
||||
unsigned num_pages = ttm->num_pages - pinned;
|
||||
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
|
||||
struct page **pages = ttm->pages + pinned;
|
||||
struct page **p = pages + pinned;
|
||||
struct amdgpu_ttm_gup_task_list guptask;
|
||||
|
||||
guptask.task = current;
|
||||
|
@ -545,7 +541,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
|
|||
spin_unlock(>t->guptasklock);
|
||||
|
||||
r = get_user_pages(current, current->mm, userptr, num_pages,
|
||||
write, 0, pages, NULL);
|
||||
write, 0, p, NULL);
|
||||
|
||||
spin_lock(>t->guptasklock);
|
||||
list_del(&guptask.list);
|
||||
|
@ -558,6 +554,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
|
|||
|
||||
} while (pinned < ttm->num_pages);
|
||||
|
||||
return 0;
|
||||
|
||||
release_pages:
|
||||
release_pages(pages, pinned, 0);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* prepare the sg table with the user pages */
|
||||
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
|
||||
{
|
||||
struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
unsigned nents;
|
||||
int r;
|
||||
|
||||
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
|
||||
enum dma_data_direction direction = write ?
|
||||
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
|
||||
|
||||
r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
|
||||
ttm->num_pages << PAGE_SHIFT,
|
||||
GFP_KERNEL);
|
||||
|
@ -576,9 +591,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
|
|||
|
||||
release_sg:
|
||||
kfree(ttm->sg);
|
||||
|
||||
release_pages:
|
||||
release_pages(ttm->pages, pinned, 0);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -803,6 +815,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
|||
gtt->userflags = flags;
|
||||
spin_lock_init(>t->guptasklock);
|
||||
INIT_LIST_HEAD(>t->guptasks);
|
||||
atomic_set(>t->mmu_invalidations, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -840,9 +853,21 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
|||
}
|
||||
spin_unlock(>t->guptasklock);
|
||||
|
||||
atomic_inc(>t->mmu_invalidations);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
|
||||
int *last_invalidated)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
int prev_invalidated = *last_invalidated;
|
||||
|
||||
*last_invalidated = atomic_read(>t->mmu_invalidations);
|
||||
return prev_invalidated != *last_invalidated;
|
||||
}
|
||||
|
||||
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
|
|
|
@ -95,6 +95,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
|
|||
entry->priority = 0;
|
||||
entry->tv.bo = &vm->page_directory->tbo;
|
||||
entry->tv.shared = true;
|
||||
entry->user_pages = NULL;
|
||||
list_add(&entry->tv.head, validated);
|
||||
}
|
||||
|
||||
|
@ -1186,6 +1187,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
|
|||
entry->priority = 0;
|
||||
entry->tv.bo = &entry->robj->tbo;
|
||||
entry->tv.shared = true;
|
||||
entry->user_pages = NULL;
|
||||
vm->page_tables[pt_idx].addr = 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue