From 751580b3ff9ac6bf39da8586e132dbebee2409ef Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 16 Jun 2021 09:51:47 -0400 Subject: [PATCH] drm/amdkfd: add sysfs counters for vm fault and migration This is part of SVM profiling API, export sysfs counters for per-process, per-GPU vm retry fault, pages migrated in and out of GPU vram. counters will not be updated in parallel in GPU retry fault handler and migration to vram/ram path, use READ_ONCE to avoid compiler optimization. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 151 ++++++++++++++++++----- 2 files changed, 131 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6dc22fa1e555..3426743ed228 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -730,6 +730,15 @@ struct kfd_process_device { * number of CU's a device has along with number of other competing processes */ struct attribute attr_cu_occupancy; + + /* sysfs counters for GPU retry fault and page migration tracking */ + struct kobject *kobj_counters; + struct attribute attr_faults; + struct attribute attr_page_in; + struct attribute attr_page_out; + uint64_t faults; + uint64_t page_in; + uint64_t page_out; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index cfc36fceac8a..21ec8a18cad2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -416,6 +416,29 @@ static ssize_t kfd_procfs_stats_show(struct kobject *kobj, return 0; } +static ssize_t kfd_sysfs_counters_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kfd_process_device *pdd; + + if (!strcmp(attr->name, "faults")) { + pdd = container_of(attr, struct kfd_process_device, + attr_faults); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults)); + } + if (!strcmp(attr->name, "page_in")) { + pdd = container_of(attr, struct kfd_process_device, + attr_page_in); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in)); + } + if (!strcmp(attr->name, "page_out")) { + pdd = container_of(attr, struct kfd_process_device, + attr_page_out); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out)); + } + return 0; +} + static struct attribute attr_queue_size = { .name = "size", .mode = KFD_SYSFS_FILE_MODE @@ -456,6 +479,15 @@ static struct kobj_type procfs_stats_type = { .release = kfd_procfs_kobj_release, }; +static const struct sysfs_ops sysfs_counters_ops = { + .show = kfd_sysfs_counters_show, +}; + +static struct kobj_type sysfs_counters_type = { + .sysfs_ops = &sysfs_counters_ops, + .release = kfd_procfs_kobj_release, +}; + int kfd_procfs_add_queue(struct queue *q) { struct kfd_process *proc; @@ -544,6 +576,50 @@ static void kfd_procfs_add_sysfs_stats(struct kfd_process *p) } } +static void kfd_procfs_add_sysfs_counters(struct kfd_process *p) +{ + int ret = 0; + int i; + char counters_dir_filename[MAX_SYSFS_FILENAME_LEN]; + + if (!p || !p->kobj) + return; + + /* + * Create sysfs files for each GPU which supports SVM + * - proc//counters_/ + * - proc//counters_/faults + * - proc//counters_/page_in + * - proc//counters_/page_out + */ + for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd = p->pdds[i]; + struct kobject *kobj_counters; + + snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN, + "counters_%u", pdd->dev->id); + kobj_counters = kfd_alloc_struct(kobj_counters); + if (!kobj_counters) + return; + + ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type, + p->kobj, counters_dir_filename); + if (ret) { + pr_warn("Creating KFD proc/%s folder failed", + counters_dir_filename); + kobject_put(kobj_counters); + return; + } + + pdd->kobj_counters = kobj_counters; + kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults, + "faults"); + kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in, + "page_in"); + kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out, + "page_out"); + } +} static void kfd_procfs_add_sysfs_files(struct kfd_process *p) { @@ -777,6 +853,7 @@ struct kfd_process *kfd_create_process(struct file *filep) kfd_procfs_add_sysfs_stats(process); kfd_procfs_add_sysfs_files(process); + kfd_procfs_add_sysfs_counters(process); } out: if (!IS_ERR(process)) @@ -919,6 +996,50 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) p->n_pdds = 0; } +static void kfd_process_remove_sysfs(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + int i; + + if (!p->kobj) + return; + + sysfs_remove_file(p->kobj, &p->attr_pasid); + kobject_del(p->kobj_queues); + kobject_put(p->kobj_queues); + p->kobj_queues = NULL; + + for (i = 0; i < p->n_pdds; i++) { + pdd = p->pdds[i]; + + sysfs_remove_file(p->kobj, &pdd->attr_vram); + sysfs_remove_file(p->kobj, &pdd->attr_sdma); + + sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); + if (pdd->dev->kfd2kgd->get_cu_occupancy) + sysfs_remove_file(pdd->kobj_stats, + &pdd->attr_cu_occupancy); + kobject_del(pdd->kobj_stats); + kobject_put(pdd->kobj_stats); + pdd->kobj_stats = NULL; + } + + for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { + pdd = p->pdds[i]; + + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults); + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in); + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out); + kobject_del(pdd->kobj_counters); + kobject_put(pdd->kobj_counters); + pdd->kobj_counters = NULL; + } + + kobject_del(p->kobj); + kobject_put(p->kobj); + p->kobj = NULL; +} + /* No process locking is needed in this function, because the process * is not findable any more. We must assume that no other thread is * using it any more, otherwise we couldn't safely free the process @@ -928,35 +1049,7 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); - int i; - - /* Remove the procfs files */ - if (p->kobj) { - sysfs_remove_file(p->kobj, &p->attr_pasid); - kobject_del(p->kobj_queues); - kobject_put(p->kobj_queues); - p->kobj_queues = NULL; - - for (i = 0; i < p->n_pdds; i++) { - struct kfd_process_device *pdd = p->pdds[i]; - - sysfs_remove_file(p->kobj, &pdd->attr_vram); - sysfs_remove_file(p->kobj, &pdd->attr_sdma); - - sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); - if (pdd->dev->kfd2kgd->get_cu_occupancy) - sysfs_remove_file(pdd->kobj_stats, - &pdd->attr_cu_occupancy); - kobject_del(pdd->kobj_stats); - kobject_put(pdd->kobj_stats); - pdd->kobj_stats = NULL; - } - - kobject_del(p->kobj); - kobject_put(p->kobj); - p->kobj = NULL; - } - + kfd_process_remove_sysfs(p); kfd_iommu_unbind_process(p); kfd_process_free_outstanding_kfd_bos(p);