hmm related patches for 5.8
This series adds a selftest for hmm_range_fault() and several of the DEVICE_PRIVATE migration related actions, and another simplification for hmm_range_fault()'s API. - Simplify hmm_range_fault() with a simpler return code, no HMM_PFN_SPECIAL, and no customizable output PFN format - Add a selftest for hmm_range_fault() and DEVICE_PRIVATE related functionality -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAl7VQr8ACgkQOG33FX4g mxrpcg/+O+oZ2p8FDTZi/0BTaU0crUiKwJngmmv78UuvD8nzhOZ0fkhK2lsXn9Uo 70lYbfDUSX2TbReP7y39VArW0v+Bj7wo9/7AZ+R2o5A0ajC6kccjGdnb7uEc3L6v CR+uumRYf/ZNz13cbuRBbYEz477DGnz+3vhBb4FLNTFj9XiNAC61jA1WUI0ep6x3 lDrkhDatqmdBJ+EqZDMq2+UH+lWbkptQT7hPqgEp6o7FqdnySxRd+rT3hALz5wNP fbryfWXM7V1eh7Kxr2mBJJqIkgbdhGLj2yLl1Iz11BbG6u7AT20r23WTvJ7hUCyt 18574twdltZ81gheqqN7KVYYAo+5seMfP14QdthqzzBMo3pOeLG0JMVqQNisDPgn Tf4lWF/GR7ajKxyRbLdvUgRE7pFQ9VMAiP86GoIpBFmSZQQDwcecnoYxg60zsTwR yuf60gopfNsSWNmDqKT3td12PQyFQYHYT6ue1eW6Rb9P+yA++tZaGkvGFn7kHeNV ZeUqsKEy6a9l6cDrFzNmsCcdNZg/qmw9mKFfa/4RRulU5jlskt/e52NiLaLU2rsr 0Tot3j5tMufLLorZPprMI3Z/M9ohVAS5DkX6ttcZDs5v0iGQEUOOnq0cXmwlJQ9I 0CHr2ImjiDr9v2fS+5ixaRNSHfnQWnHxcqq79UZiTjtPW1Daauo= =twev -----END PGP SIGNATURE----- Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma Pull hmm updates from Jason Gunthorpe: "This series adds a selftest for hmm_range_fault() and several of the DEVICE_PRIVATE migration related actions, and another simplification for hmm_range_fault()'s API. - Simplify hmm_range_fault() with a simpler return code, no HMM_PFN_SPECIAL, and no customizable output PFN format - Add a selftest for hmm_range_fault() and DEVICE_PRIVATE related functionality" * tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: MAINTAINERS: add HMM selftests mm/hmm/test: add selftests for HMM mm/hmm/test: add selftest driver for HMM mm/hmm: remove the customizable pfn format from hmm_range_fault mm/hmm: remove HMM_PFN_SPECIAL drm/amdgpu: remove dead code after hmm_range_fault() mm/hmm: make hmm_range_fault return 0 or -1
This commit is contained in:
commit
cfa3b8068b
|
@ -161,7 +161,7 @@ device must complete the update before the driver callback returns.
|
||||||
When the device driver wants to populate a range of virtual addresses, it can
|
When the device driver wants to populate a range of virtual addresses, it can
|
||||||
use::
|
use::
|
||||||
|
|
||||||
long hmm_range_fault(struct hmm_range *range);
|
int hmm_range_fault(struct hmm_range *range);
|
||||||
|
|
||||||
It will trigger a page fault on missing or read-only entries if write access is
|
It will trigger a page fault on missing or read-only entries if write access is
|
||||||
requested (see below). Page faults use the generic mm page fault code path just
|
requested (see below). Page faults use the generic mm page fault code path just
|
||||||
|
@ -184,10 +184,7 @@ The usage pattern is::
|
||||||
range.notifier = &interval_sub;
|
range.notifier = &interval_sub;
|
||||||
range.start = ...;
|
range.start = ...;
|
||||||
range.end = ...;
|
range.end = ...;
|
||||||
range.pfns = ...;
|
range.hmm_pfns = ...;
|
||||||
range.flags = ...;
|
|
||||||
range.values = ...;
|
|
||||||
range.pfn_shift = ...;
|
|
||||||
|
|
||||||
if (!mmget_not_zero(interval_sub->notifier.mm))
|
if (!mmget_not_zero(interval_sub->notifier.mm))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
@ -229,15 +226,10 @@ The hmm_range struct has 2 fields, default_flags and pfn_flags_mask, that specif
|
||||||
fault or snapshot policy for the whole range instead of having to set them
|
fault or snapshot policy for the whole range instead of having to set them
|
||||||
for each entry in the pfns array.
|
for each entry in the pfns array.
|
||||||
|
|
||||||
For instance, if the device flags for range.flags are::
|
For instance if the device driver wants pages for a range with at least read
|
||||||
|
permission, it sets::
|
||||||
|
|
||||||
range.flags[HMM_PFN_VALID] = (1 << 63);
|
range->default_flags = HMM_PFN_REQ_FAULT;
|
||||||
range.flags[HMM_PFN_WRITE] = (1 << 62);
|
|
||||||
|
|
||||||
and the device driver wants pages for a range with at least read permission,
|
|
||||||
it sets::
|
|
||||||
|
|
||||||
range->default_flags = (1 << 63);
|
|
||||||
range->pfn_flags_mask = 0;
|
range->pfn_flags_mask = 0;
|
||||||
|
|
||||||
and calls hmm_range_fault() as described above. This will fill fault all pages
|
and calls hmm_range_fault() as described above. This will fill fault all pages
|
||||||
|
@ -246,18 +238,18 @@ in the range with at least read permission.
|
||||||
Now let's say the driver wants to do the same except for one page in the range for
|
Now let's say the driver wants to do the same except for one page in the range for
|
||||||
which it wants to have write permission. Now driver set::
|
which it wants to have write permission. Now driver set::
|
||||||
|
|
||||||
range->default_flags = (1 << 63);
|
range->default_flags = HMM_PFN_REQ_FAULT;
|
||||||
range->pfn_flags_mask = (1 << 62);
|
range->pfn_flags_mask = HMM_PFN_REQ_WRITE;
|
||||||
range->pfns[index_of_write] = (1 << 62);
|
range->pfns[index_of_write] = HMM_PFN_REQ_WRITE;
|
||||||
|
|
||||||
With this, HMM will fault in all pages with at least read (i.e., valid) and for the
|
With this, HMM will fault in all pages with at least read (i.e., valid) and for the
|
||||||
address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
|
address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
|
||||||
write permission i.e., if the CPU pte does not have write permission set then HMM
|
write permission i.e., if the CPU pte does not have write permission set then HMM
|
||||||
will call handle_mm_fault().
|
will call handle_mm_fault().
|
||||||
|
|
||||||
Note that HMM will populate the pfns array with write permission for any page
|
After hmm_range_fault completes the flag bits are set to the current state of
|
||||||
that is mapped with CPU write permission no matter what values are set
|
the page tables, ie HMM_PFN_VALID | HMM_PFN_WRITE will be set if the page is
|
||||||
in default_flags or pfn_flags_mask.
|
writable.
|
||||||
|
|
||||||
|
|
||||||
Represent and manage device memory from core kernel point of view
|
Represent and manage device memory from core kernel point of view
|
||||||
|
|
|
@ -7768,7 +7768,9 @@ L: linux-mm@kvack.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: Documentation/vm/hmm.rst
|
F: Documentation/vm/hmm.rst
|
||||||
F: include/linux/hmm*
|
F: include/linux/hmm*
|
||||||
|
F: lib/test_hmm*
|
||||||
F: mm/hmm*
|
F: mm/hmm*
|
||||||
|
F: tools/testing/selftests/vm/*hmm*
|
||||||
|
|
||||||
HOST AP DRIVER
|
HOST AP DRIVER
|
||||||
M: Jouni Malinen <j@w1.fi>
|
M: Jouni Malinen <j@w1.fi>
|
||||||
|
|
|
@ -766,18 +766,6 @@ struct amdgpu_ttm_tt {
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_DRM_AMDGPU_USERPTR
|
#ifdef CONFIG_DRM_AMDGPU_USERPTR
|
||||||
/* flags used by HMM internal, not related to CPU/GPU PTE flags */
|
|
||||||
static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
|
|
||||||
(1 << 0), /* HMM_PFN_VALID */
|
|
||||||
(1 << 1), /* HMM_PFN_WRITE */
|
|
||||||
};
|
|
||||||
|
|
||||||
static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
|
|
||||||
0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
|
|
||||||
0, /* HMM_PFN_NONE */
|
|
||||||
0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
|
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
|
||||||
* memory and start HMM tracking CPU page table update
|
* memory and start HMM tracking CPU page table update
|
||||||
|
@ -816,18 +804,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
range->notifier = &bo->notifier;
|
range->notifier = &bo->notifier;
|
||||||
range->flags = hmm_range_flags;
|
|
||||||
range->values = hmm_range_values;
|
|
||||||
range->pfn_shift = PAGE_SHIFT;
|
|
||||||
range->start = bo->notifier.interval_tree.start;
|
range->start = bo->notifier.interval_tree.start;
|
||||||
range->end = bo->notifier.interval_tree.last + 1;
|
range->end = bo->notifier.interval_tree.last + 1;
|
||||||
range->default_flags = hmm_range_flags[HMM_PFN_VALID];
|
range->default_flags = HMM_PFN_REQ_FAULT;
|
||||||
if (!amdgpu_ttm_tt_is_readonly(ttm))
|
if (!amdgpu_ttm_tt_is_readonly(ttm))
|
||||||
range->default_flags |= range->flags[HMM_PFN_WRITE];
|
range->default_flags |= HMM_PFN_REQ_WRITE;
|
||||||
|
|
||||||
range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
|
range->hmm_pfns = kvmalloc_array(ttm->num_pages,
|
||||||
GFP_KERNEL);
|
sizeof(*range->hmm_pfns), GFP_KERNEL);
|
||||||
if (unlikely(!range->pfns)) {
|
if (unlikely(!range->hmm_pfns)) {
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
goto out_free_ranges;
|
goto out_free_ranges;
|
||||||
}
|
}
|
||||||
|
@ -852,27 +837,23 @@ retry:
|
||||||
down_read(&mm->mmap_sem);
|
down_read(&mm->mmap_sem);
|
||||||
r = hmm_range_fault(range);
|
r = hmm_range_fault(range);
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
if (unlikely(r <= 0)) {
|
if (unlikely(r)) {
|
||||||
/*
|
/*
|
||||||
* FIXME: This timeout should encompass the retry from
|
* FIXME: This timeout should encompass the retry from
|
||||||
* mmu_interval_read_retry() as well.
|
* mmu_interval_read_retry() as well.
|
||||||
*/
|
*/
|
||||||
if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
|
if (r == -EBUSY && !time_after(jiffies, timeout))
|
||||||
goto retry;
|
goto retry;
|
||||||
goto out_free_pfns;
|
goto out_free_pfns;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < ttm->num_pages; i++) {
|
/*
|
||||||
/* FIXME: The pages cannot be touched outside the notifier_lock */
|
* Due to default_flags, all pages are HMM_PFN_VALID or
|
||||||
pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
|
* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
|
||||||
if (unlikely(!pages[i])) {
|
* the notifier_lock, and mmu_interval_read_retry() must be done first.
|
||||||
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
|
*/
|
||||||
i, range->pfns[i]);
|
for (i = 0; i < ttm->num_pages; i++)
|
||||||
r = -ENOMEM;
|
pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
|
||||||
|
|
||||||
goto out_free_pfns;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
gtt->range = range;
|
gtt->range = range;
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
|
@ -882,7 +863,7 @@ retry:
|
||||||
out_unlock:
|
out_unlock:
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
out_free_pfns:
|
out_free_pfns:
|
||||||
kvfree(range->pfns);
|
kvfree(range->hmm_pfns);
|
||||||
out_free_ranges:
|
out_free_ranges:
|
||||||
kfree(range);
|
kfree(range);
|
||||||
out:
|
out:
|
||||||
|
@ -907,7 +888,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||||
DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
|
DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
|
||||||
gtt->userptr, ttm->num_pages);
|
gtt->userptr, ttm->num_pages);
|
||||||
|
|
||||||
WARN_ONCE(!gtt->range || !gtt->range->pfns,
|
WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
|
||||||
"No user pages to check\n");
|
"No user pages to check\n");
|
||||||
|
|
||||||
if (gtt->range) {
|
if (gtt->range) {
|
||||||
|
@ -917,7 +898,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||||
*/
|
*/
|
||||||
r = mmu_interval_read_retry(gtt->range->notifier,
|
r = mmu_interval_read_retry(gtt->range->notifier,
|
||||||
gtt->range->notifier_seq);
|
gtt->range->notifier_seq);
|
||||||
kvfree(gtt->range->pfns);
|
kvfree(gtt->range->hmm_pfns);
|
||||||
kfree(gtt->range);
|
kfree(gtt->range);
|
||||||
gtt->range = NULL;
|
gtt->range = NULL;
|
||||||
}
|
}
|
||||||
|
@ -1008,8 +989,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
|
||||||
|
|
||||||
for (i = 0; i < ttm->num_pages; i++) {
|
for (i = 0; i < ttm->num_pages; i++) {
|
||||||
if (ttm->pages[i] !=
|
if (ttm->pages[i] !=
|
||||||
hmm_device_entry_to_page(gtt->range,
|
hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
|
||||||
gtt->range->pfns[i]))
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,7 @@ static inline struct nouveau_dmem *page_to_dmem(struct page *page)
|
||||||
return container_of(page->pgmap, struct nouveau_dmem, pagemap);
|
return container_of(page->pgmap, struct nouveau_dmem, pagemap);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long nouveau_dmem_page_addr(struct page *page)
|
unsigned long nouveau_dmem_page_addr(struct page *page)
|
||||||
{
|
{
|
||||||
struct nouveau_dmem_chunk *chunk = page->zone_device_data;
|
struct nouveau_dmem_chunk *chunk = page->zone_device_data;
|
||||||
unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
|
unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
|
||||||
|
@ -671,28 +671,3 @@ out_free_src:
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
|
|
||||||
struct hmm_range *range)
|
|
||||||
{
|
|
||||||
unsigned long i, npages;
|
|
||||||
|
|
||||||
npages = (range->end - range->start) >> PAGE_SHIFT;
|
|
||||||
for (i = 0; i < npages; ++i) {
|
|
||||||
struct page *page;
|
|
||||||
uint64_t addr;
|
|
||||||
|
|
||||||
page = hmm_device_entry_to_page(range, range->pfns[i]);
|
|
||||||
if (page == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!is_device_private_page(page))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
addr = nouveau_dmem_page_addr(page);
|
|
||||||
range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
|
|
||||||
range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
|
|
||||||
range->pfns[i] |= NVIF_VMM_PFNMAP_V0_VRAM;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -37,9 +37,8 @@ int nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
|
||||||
struct vm_area_struct *vma,
|
struct vm_area_struct *vma,
|
||||||
unsigned long start,
|
unsigned long start,
|
||||||
unsigned long end);
|
unsigned long end);
|
||||||
|
unsigned long nouveau_dmem_page_addr(struct page *page);
|
||||||
|
|
||||||
void nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
|
|
||||||
struct hmm_range *range);
|
|
||||||
#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
|
#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
|
||||||
static inline void nouveau_dmem_init(struct nouveau_drm *drm) {}
|
static inline void nouveau_dmem_init(struct nouveau_drm *drm) {}
|
||||||
static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {}
|
static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {}
|
||||||
|
|
|
@ -369,19 +369,6 @@ out_free:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const u64
|
|
||||||
nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = {
|
|
||||||
[HMM_PFN_VALID ] = NVIF_VMM_PFNMAP_V0_V,
|
|
||||||
[HMM_PFN_WRITE ] = NVIF_VMM_PFNMAP_V0_W,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const u64
|
|
||||||
nouveau_svm_pfn_values[HMM_PFN_VALUE_MAX] = {
|
|
||||||
[HMM_PFN_ERROR ] = ~NVIF_VMM_PFNMAP_V0_V,
|
|
||||||
[HMM_PFN_NONE ] = NVIF_VMM_PFNMAP_V0_NONE,
|
|
||||||
[HMM_PFN_SPECIAL] = ~NVIF_VMM_PFNMAP_V0_V,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Issue fault replay for GPU to retry accesses that faulted previously. */
|
/* Issue fault replay for GPU to retry accesses that faulted previously. */
|
||||||
static void
|
static void
|
||||||
nouveau_svm_fault_replay(struct nouveau_svm *svm)
|
nouveau_svm_fault_replay(struct nouveau_svm *svm)
|
||||||
|
@ -519,9 +506,45 @@ static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
|
||||||
.invalidate = nouveau_svm_range_invalidate,
|
.invalidate = nouveau_svm_range_invalidate,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
|
||||||
|
struct hmm_range *range, u64 *ioctl_addr)
|
||||||
|
{
|
||||||
|
unsigned long i, npages;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The ioctl_addr prepared here is passed through nvif_object_ioctl()
|
||||||
|
* to an eventual DMA map in something like gp100_vmm_pgt_pfn()
|
||||||
|
*
|
||||||
|
* This is all just encoding the internal hmm representation into a
|
||||||
|
* different nouveau internal representation.
|
||||||
|
*/
|
||||||
|
npages = (range->end - range->start) >> PAGE_SHIFT;
|
||||||
|
for (i = 0; i < npages; ++i) {
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) {
|
||||||
|
ioctl_addr[i] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
page = hmm_pfn_to_page(range->hmm_pfns[i]);
|
||||||
|
if (is_device_private_page(page))
|
||||||
|
ioctl_addr[i] = nouveau_dmem_page_addr(page) |
|
||||||
|
NVIF_VMM_PFNMAP_V0_V |
|
||||||
|
NVIF_VMM_PFNMAP_V0_VRAM;
|
||||||
|
else
|
||||||
|
ioctl_addr[i] = page_to_phys(page) |
|
||||||
|
NVIF_VMM_PFNMAP_V0_V |
|
||||||
|
NVIF_VMM_PFNMAP_V0_HOST;
|
||||||
|
if (range->hmm_pfns[i] & HMM_PFN_WRITE)
|
||||||
|
ioctl_addr[i] |= NVIF_VMM_PFNMAP_V0_W;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int nouveau_range_fault(struct nouveau_svmm *svmm,
|
static int nouveau_range_fault(struct nouveau_svmm *svmm,
|
||||||
struct nouveau_drm *drm, void *data, u32 size,
|
struct nouveau_drm *drm, void *data, u32 size,
|
||||||
u64 *pfns, struct svm_notifier *notifier)
|
unsigned long hmm_pfns[], u64 *ioctl_addr,
|
||||||
|
struct svm_notifier *notifier)
|
||||||
{
|
{
|
||||||
unsigned long timeout =
|
unsigned long timeout =
|
||||||
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||||
|
@ -530,26 +553,27 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
|
||||||
.notifier = ¬ifier->notifier,
|
.notifier = ¬ifier->notifier,
|
||||||
.start = notifier->notifier.interval_tree.start,
|
.start = notifier->notifier.interval_tree.start,
|
||||||
.end = notifier->notifier.interval_tree.last + 1,
|
.end = notifier->notifier.interval_tree.last + 1,
|
||||||
.pfns = pfns,
|
.pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE,
|
||||||
.flags = nouveau_svm_pfn_flags,
|
.hmm_pfns = hmm_pfns,
|
||||||
.values = nouveau_svm_pfn_values,
|
|
||||||
.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT,
|
|
||||||
};
|
};
|
||||||
struct mm_struct *mm = notifier->notifier.mm;
|
struct mm_struct *mm = notifier->notifier.mm;
|
||||||
long ret;
|
int ret;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (time_after(jiffies, timeout))
|
if (time_after(jiffies, timeout))
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
range.notifier_seq = mmu_interval_read_begin(range.notifier);
|
range.notifier_seq = mmu_interval_read_begin(range.notifier);
|
||||||
range.default_flags = 0;
|
|
||||||
range.pfn_flags_mask = -1UL;
|
|
||||||
down_read(&mm->mmap_sem);
|
down_read(&mm->mmap_sem);
|
||||||
ret = hmm_range_fault(&range);
|
ret = hmm_range_fault(&range);
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
if (ret <= 0) {
|
if (ret) {
|
||||||
if (ret == 0 || ret == -EBUSY)
|
/*
|
||||||
|
* FIXME: the input PFN_REQ flags are destroyed on
|
||||||
|
* -EBUSY, we need to regenerate them, also for the
|
||||||
|
* other continue below
|
||||||
|
*/
|
||||||
|
if (ret == -EBUSY)
|
||||||
continue;
|
continue;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -563,7 +587,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
nouveau_dmem_convert_pfn(drm, &range);
|
nouveau_hmm_convert_pfn(drm, &range, ioctl_addr);
|
||||||
|
|
||||||
svmm->vmm->vmm.object.client->super = true;
|
svmm->vmm->vmm.object.client->super = true;
|
||||||
ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
|
ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
|
||||||
|
@ -590,6 +614,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
|
||||||
} i;
|
} i;
|
||||||
u64 phys[16];
|
u64 phys[16];
|
||||||
} args;
|
} args;
|
||||||
|
unsigned long hmm_pfns[ARRAY_SIZE(args.phys)];
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
u64 inst, start, limit;
|
u64 inst, start, limit;
|
||||||
int fi, fn, pi, fill;
|
int fi, fn, pi, fill;
|
||||||
|
@ -705,12 +730,17 @@ nouveau_svm_fault(struct nvif_notify *notify)
|
||||||
* access flags.
|
* access flags.
|
||||||
*XXX: atomic?
|
*XXX: atomic?
|
||||||
*/
|
*/
|
||||||
if (buffer->fault[fn]->access != 0 /* READ. */ &&
|
switch (buffer->fault[fn]->access) {
|
||||||
buffer->fault[fn]->access != 3 /* PREFETCH. */) {
|
case 0: /* READ. */
|
||||||
args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V |
|
hmm_pfns[pi++] = HMM_PFN_REQ_FAULT;
|
||||||
NVIF_VMM_PFNMAP_V0_W;
|
break;
|
||||||
} else {
|
case 3: /* PREFETCH. */
|
||||||
args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V;
|
hmm_pfns[pi++] = 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
hmm_pfns[pi++] = HMM_PFN_REQ_FAULT |
|
||||||
|
HMM_PFN_REQ_WRITE;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
args.i.p.size = pi << PAGE_SHIFT;
|
args.i.p.size = pi << PAGE_SHIFT;
|
||||||
|
|
||||||
|
@ -738,7 +768,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
|
||||||
fill = (buffer->fault[fn ]->addr -
|
fill = (buffer->fault[fn ]->addr -
|
||||||
buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
|
buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
|
||||||
while (--fill)
|
while (--fill)
|
||||||
args.phys[pi++] = NVIF_VMM_PFNMAP_V0_NONE;
|
hmm_pfns[pi++] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)",
|
SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)",
|
||||||
|
@ -754,7 +784,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
|
||||||
ret = nouveau_range_fault(
|
ret = nouveau_range_fault(
|
||||||
svmm, svm->drm, &args,
|
svmm, svm->drm, &args,
|
||||||
sizeof(args.i) + pi * sizeof(args.phys[0]),
|
sizeof(args.i) + pi * sizeof(args.phys[0]),
|
||||||
args.phys, ¬ifier);
|
hmm_pfns, args.phys, ¬ifier);
|
||||||
mmu_interval_notifier_remove(¬ifier.notifier);
|
mmu_interval_notifier_remove(¬ifier.notifier);
|
||||||
}
|
}
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
|
|
|
@ -19,51 +19,47 @@
|
||||||
#include <linux/mmu_notifier.h>
|
#include <linux/mmu_notifier.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* hmm_pfn_flag_e - HMM flag enums
|
* On output:
|
||||||
|
* 0 - The page is faultable and a future call with
|
||||||
|
* HMM_PFN_REQ_FAULT could succeed.
|
||||||
|
* HMM_PFN_VALID - the pfn field points to a valid PFN. This PFN is at
|
||||||
|
* least readable. If dev_private_owner is !NULL then this could
|
||||||
|
* point at a DEVICE_PRIVATE page.
|
||||||
|
* HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
|
||||||
|
* HMM_PFN_ERROR - accessing the pfn is impossible and the device should
|
||||||
|
* fail. ie poisoned memory, special pages, no vma, etc
|
||||||
*
|
*
|
||||||
* Flags:
|
* On input:
|
||||||
* HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
|
* 0 - Return the current state of the page, do not fault it.
|
||||||
* HMM_PFN_WRITE: CPU page table has write permission set
|
* HMM_PFN_REQ_FAULT - The output must have HMM_PFN_VALID or hmm_range_fault()
|
||||||
*
|
* will fail
|
||||||
* The driver provides a flags array for mapping page protections to device
|
* HMM_PFN_REQ_WRITE - The output must have HMM_PFN_WRITE or hmm_range_fault()
|
||||||
* PTE bits. If the driver valid bit for an entry is bit 3,
|
* will fail. Must be combined with HMM_PFN_REQ_FAULT.
|
||||||
* i.e., (entry & (1 << 3)), then the driver must provide
|
|
||||||
* an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
|
|
||||||
* Same logic apply to all flags. This is the same idea as vm_page_prot in vma
|
|
||||||
* except that this is per device driver rather than per architecture.
|
|
||||||
*/
|
*/
|
||||||
enum hmm_pfn_flag_e {
|
enum hmm_pfn_flags {
|
||||||
HMM_PFN_VALID = 0,
|
/* Output flags */
|
||||||
HMM_PFN_WRITE,
|
HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1),
|
||||||
HMM_PFN_FLAG_MAX
|
HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2),
|
||||||
|
HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3),
|
||||||
|
|
||||||
|
/* Input flags */
|
||||||
|
HMM_PFN_REQ_FAULT = HMM_PFN_VALID,
|
||||||
|
HMM_PFN_REQ_WRITE = HMM_PFN_WRITE,
|
||||||
|
|
||||||
|
HMM_PFN_FLAGS = HMM_PFN_VALID | HMM_PFN_WRITE | HMM_PFN_ERROR,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* hmm_pfn_value_e - HMM pfn special value
|
* hmm_pfn_to_page() - return struct page pointed to by a device entry
|
||||||
*
|
*
|
||||||
* Flags:
|
* This must be called under the caller 'user_lock' after a successful
|
||||||
* HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
|
* mmu_interval_read_begin(). The caller must have tested for HMM_PFN_VALID
|
||||||
* HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
|
* already.
|
||||||
* HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
|
|
||||||
* result of vmf_insert_pfn() or vm_insert_page(). Therefore, it should not
|
|
||||||
* be mirrored by a device, because the entry will never have HMM_PFN_VALID
|
|
||||||
* set and the pfn value is undefined.
|
|
||||||
*
|
|
||||||
* Driver provides values for none entry, error entry, and special entry.
|
|
||||||
* Driver can alias (i.e., use same value) error and special, but
|
|
||||||
* it should not alias none with error or special.
|
|
||||||
*
|
|
||||||
* HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
|
|
||||||
* hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
|
|
||||||
* hmm_range.values[HMM_PFN_NONE] if there is no CPU page table entry,
|
|
||||||
* hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
|
|
||||||
*/
|
*/
|
||||||
enum hmm_pfn_value_e {
|
static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn)
|
||||||
HMM_PFN_ERROR,
|
{
|
||||||
HMM_PFN_NONE,
|
return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS);
|
||||||
HMM_PFN_SPECIAL,
|
}
|
||||||
HMM_PFN_VALUE_MAX
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* struct hmm_range - track invalidation lock on virtual address range
|
* struct hmm_range - track invalidation lock on virtual address range
|
||||||
|
@ -72,12 +68,9 @@ enum hmm_pfn_value_e {
|
||||||
* @notifier_seq: result of mmu_interval_read_begin()
|
* @notifier_seq: result of mmu_interval_read_begin()
|
||||||
* @start: range virtual start address (inclusive)
|
* @start: range virtual start address (inclusive)
|
||||||
* @end: range virtual end address (exclusive)
|
* @end: range virtual end address (exclusive)
|
||||||
* @pfns: array of pfns (big enough for the range)
|
* @hmm_pfns: array of pfns (big enough for the range)
|
||||||
* @flags: pfn flags to match device driver page table
|
|
||||||
* @values: pfn value for some special case (none, special, error, ...)
|
|
||||||
* @default_flags: default flags for the range (write, read, ... see hmm doc)
|
* @default_flags: default flags for the range (write, read, ... see hmm doc)
|
||||||
* @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
|
* @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
|
||||||
* @pfn_shift: pfn shift value (should be <= PAGE_SHIFT)
|
|
||||||
* @dev_private_owner: owner of device private pages
|
* @dev_private_owner: owner of device private pages
|
||||||
*/
|
*/
|
||||||
struct hmm_range {
|
struct hmm_range {
|
||||||
|
@ -85,42 +78,16 @@ struct hmm_range {
|
||||||
unsigned long notifier_seq;
|
unsigned long notifier_seq;
|
||||||
unsigned long start;
|
unsigned long start;
|
||||||
unsigned long end;
|
unsigned long end;
|
||||||
uint64_t *pfns;
|
unsigned long *hmm_pfns;
|
||||||
const uint64_t *flags;
|
unsigned long default_flags;
|
||||||
const uint64_t *values;
|
unsigned long pfn_flags_mask;
|
||||||
uint64_t default_flags;
|
|
||||||
uint64_t pfn_flags_mask;
|
|
||||||
uint8_t pfn_shift;
|
|
||||||
void *dev_private_owner;
|
void *dev_private_owner;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* hmm_device_entry_to_page() - return struct page pointed to by a device entry
|
|
||||||
* @range: range use to decode device entry value
|
|
||||||
* @entry: device entry value to get corresponding struct page from
|
|
||||||
* Return: struct page pointer if entry is a valid, NULL otherwise
|
|
||||||
*
|
|
||||||
* If the device entry is valid (ie valid flag set) then return the struct page
|
|
||||||
* matching the entry value. Otherwise return NULL.
|
|
||||||
*/
|
|
||||||
static inline struct page *hmm_device_entry_to_page(const struct hmm_range *range,
|
|
||||||
uint64_t entry)
|
|
||||||
{
|
|
||||||
if (entry == range->values[HMM_PFN_NONE])
|
|
||||||
return NULL;
|
|
||||||
if (entry == range->values[HMM_PFN_ERROR])
|
|
||||||
return NULL;
|
|
||||||
if (entry == range->values[HMM_PFN_SPECIAL])
|
|
||||||
return NULL;
|
|
||||||
if (!(entry & range->flags[HMM_PFN_VALID]))
|
|
||||||
return NULL;
|
|
||||||
return pfn_to_page(entry >> range->pfn_shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Please see Documentation/vm/hmm.rst for how to use the range API.
|
* Please see Documentation/vm/hmm.rst for how to use the range API.
|
||||||
*/
|
*/
|
||||||
long hmm_range_fault(struct hmm_range *range);
|
int hmm_range_fault(struct hmm_range *range);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range
|
* HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range
|
||||||
|
|
|
@ -2218,6 +2218,19 @@ config TEST_MEMINIT
|
||||||
|
|
||||||
If unsure, say N.
|
If unsure, say N.
|
||||||
|
|
||||||
|
config TEST_HMM
|
||||||
|
tristate "Test HMM (Heterogeneous Memory Management)"
|
||||||
|
depends on TRANSPARENT_HUGEPAGE
|
||||||
|
depends on DEVICE_PRIVATE
|
||||||
|
select HMM_MIRROR
|
||||||
|
select MMU_NOTIFIER
|
||||||
|
help
|
||||||
|
This is a pseudo device driver solely for testing HMM.
|
||||||
|
Say M here if you want to build the HMM test module.
|
||||||
|
Doing so will allow you to run tools/testing/selftest/vm/hmm-tests.
|
||||||
|
|
||||||
|
If unsure, say N.
|
||||||
|
|
||||||
endif # RUNTIME_TESTING_MENU
|
endif # RUNTIME_TESTING_MENU
|
||||||
|
|
||||||
config MEMTEST
|
config MEMTEST
|
||||||
|
|
|
@ -92,6 +92,7 @@ obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o
|
||||||
obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
|
obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
|
||||||
obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
|
obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
|
||||||
obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
|
obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
|
||||||
|
obj-$(CONFIG_TEST_HMM) += test_hmm.o
|
||||||
|
|
||||||
obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
|
obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,59 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||||
|
/*
|
||||||
|
* This is a module to test the HMM (Heterogeneous Memory Management) API
|
||||||
|
* of the kernel. It allows a userspace program to expose its entire address
|
||||||
|
* space through the HMM test module device file.
|
||||||
|
*/
|
||||||
|
#ifndef _LIB_TEST_HMM_UAPI_H
|
||||||
|
#define _LIB_TEST_HMM_UAPI_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/ioctl.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Structure to pass to the HMM test driver to mimic a device accessing
|
||||||
|
* system memory and ZONE_DEVICE private memory through device page tables.
|
||||||
|
*
|
||||||
|
* @addr: (in) user address the device will read/write
|
||||||
|
* @ptr: (in) user address where device data is copied to/from
|
||||||
|
* @npages: (in) number of pages to read/write
|
||||||
|
* @cpages: (out) number of pages copied
|
||||||
|
* @faults: (out) number of device page faults seen
|
||||||
|
*/
|
||||||
|
struct hmm_dmirror_cmd {
|
||||||
|
__u64 addr;
|
||||||
|
__u64 ptr;
|
||||||
|
__u64 npages;
|
||||||
|
__u64 cpages;
|
||||||
|
__u64 faults;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Expose the address space of the calling process through hmm device file */
|
||||||
|
#define HMM_DMIRROR_READ _IOWR('H', 0x00, struct hmm_dmirror_cmd)
|
||||||
|
#define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd)
|
||||||
|
#define HMM_DMIRROR_MIGRATE _IOWR('H', 0x02, struct hmm_dmirror_cmd)
|
||||||
|
#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
|
||||||
|
* HMM_DMIRROR_PROT_ERROR: no valid mirror PTE for this page
|
||||||
|
* HMM_DMIRROR_PROT_NONE: unpopulated PTE or PTE with no access
|
||||||
|
* HMM_DMIRROR_PROT_READ: read-only PTE
|
||||||
|
* HMM_DMIRROR_PROT_WRITE: read/write PTE
|
||||||
|
* HMM_DMIRROR_PROT_ZERO: special read-only zero page
|
||||||
|
* HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL: Migrated device private page on the
|
||||||
|
* device the ioctl() is made
|
||||||
|
* HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
|
||||||
|
* other device
|
||||||
|
*/
|
||||||
|
enum {
|
||||||
|
HMM_DMIRROR_PROT_ERROR = 0xFF,
|
||||||
|
HMM_DMIRROR_PROT_NONE = 0x00,
|
||||||
|
HMM_DMIRROR_PROT_READ = 0x01,
|
||||||
|
HMM_DMIRROR_PROT_WRITE = 0x02,
|
||||||
|
HMM_DMIRROR_PROT_ZERO = 0x10,
|
||||||
|
HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20,
|
||||||
|
HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30,
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _LIB_TEST_HMM_UAPI_H */
|
185
mm/hmm.c
185
mm/hmm.c
|
@ -37,28 +37,13 @@ enum {
|
||||||
HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT,
|
HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* hmm_device_entry_from_pfn() - create a valid device entry value from pfn
|
|
||||||
* @range: range use to encode HMM pfn value
|
|
||||||
* @pfn: pfn value for which to create the device entry
|
|
||||||
* Return: valid device entry for the pfn
|
|
||||||
*/
|
|
||||||
static uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
|
|
||||||
unsigned long pfn)
|
|
||||||
{
|
|
||||||
return (pfn << range->pfn_shift) | range->flags[HMM_PFN_VALID];
|
|
||||||
}
|
|
||||||
|
|
||||||
static int hmm_pfns_fill(unsigned long addr, unsigned long end,
|
static int hmm_pfns_fill(unsigned long addr, unsigned long end,
|
||||||
struct hmm_range *range, enum hmm_pfn_value_e value)
|
struct hmm_range *range, unsigned long cpu_flags)
|
||||||
{
|
{
|
||||||
uint64_t *pfns = range->pfns;
|
unsigned long i = (addr - range->start) >> PAGE_SHIFT;
|
||||||
unsigned long i;
|
|
||||||
|
|
||||||
i = (addr - range->start) >> PAGE_SHIFT;
|
|
||||||
for (; addr < end; addr += PAGE_SIZE, i++)
|
for (; addr < end; addr += PAGE_SIZE, i++)
|
||||||
pfns[i] = range->values[value];
|
range->hmm_pfns[i] = cpu_flags;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,7 +81,8 @@ static int hmm_vma_fault(unsigned long addr, unsigned long end,
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
||||||
uint64_t pfns, uint64_t cpu_flags)
|
unsigned long pfn_req_flags,
|
||||||
|
unsigned long cpu_flags)
|
||||||
{
|
{
|
||||||
struct hmm_range *range = hmm_vma_walk->range;
|
struct hmm_range *range = hmm_vma_walk->range;
|
||||||
|
|
||||||
|
@ -110,27 +96,28 @@ static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
||||||
* waste to have the user pre-fill the pfn arrays with a default
|
* waste to have the user pre-fill the pfn arrays with a default
|
||||||
* flags value.
|
* flags value.
|
||||||
*/
|
*/
|
||||||
pfns = (pfns & range->pfn_flags_mask) | range->default_flags;
|
pfn_req_flags &= range->pfn_flags_mask;
|
||||||
|
pfn_req_flags |= range->default_flags;
|
||||||
|
|
||||||
/* We aren't ask to do anything ... */
|
/* We aren't ask to do anything ... */
|
||||||
if (!(pfns & range->flags[HMM_PFN_VALID]))
|
if (!(pfn_req_flags & HMM_PFN_REQ_FAULT))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* Need to write fault ? */
|
/* Need to write fault ? */
|
||||||
if ((pfns & range->flags[HMM_PFN_WRITE]) &&
|
if ((pfn_req_flags & HMM_PFN_REQ_WRITE) &&
|
||||||
!(cpu_flags & range->flags[HMM_PFN_WRITE]))
|
!(cpu_flags & HMM_PFN_WRITE))
|
||||||
return HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT;
|
return HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT;
|
||||||
|
|
||||||
/* If CPU page table is not valid then we need to fault */
|
/* If CPU page table is not valid then we need to fault */
|
||||||
if (!(cpu_flags & range->flags[HMM_PFN_VALID]))
|
if (!(cpu_flags & HMM_PFN_VALID))
|
||||||
return HMM_NEED_FAULT;
|
return HMM_NEED_FAULT;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int
|
static unsigned int
|
||||||
hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
||||||
const uint64_t *pfns, unsigned long npages,
|
const unsigned long hmm_pfns[], unsigned long npages,
|
||||||
uint64_t cpu_flags)
|
unsigned long cpu_flags)
|
||||||
{
|
{
|
||||||
struct hmm_range *range = hmm_vma_walk->range;
|
struct hmm_range *range = hmm_vma_walk->range;
|
||||||
unsigned int required_fault = 0;
|
unsigned int required_fault = 0;
|
||||||
|
@ -142,12 +129,12 @@ hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
||||||
* hmm_pte_need_fault() will always return 0.
|
* hmm_pte_need_fault() will always return 0.
|
||||||
*/
|
*/
|
||||||
if (!((range->default_flags | range->pfn_flags_mask) &
|
if (!((range->default_flags | range->pfn_flags_mask) &
|
||||||
range->flags[HMM_PFN_VALID]))
|
HMM_PFN_REQ_FAULT))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
for (i = 0; i < npages; ++i) {
|
for (i = 0; i < npages; ++i) {
|
||||||
required_fault |=
|
required_fault |= hmm_pte_need_fault(hmm_vma_walk, hmm_pfns[i],
|
||||||
hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags);
|
cpu_flags);
|
||||||
if (required_fault == HMM_NEED_ALL_BITS)
|
if (required_fault == HMM_NEED_ALL_BITS)
|
||||||
return required_fault;
|
return required_fault;
|
||||||
}
|
}
|
||||||
|
@ -161,12 +148,13 @@ static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
|
||||||
struct hmm_range *range = hmm_vma_walk->range;
|
struct hmm_range *range = hmm_vma_walk->range;
|
||||||
unsigned int required_fault;
|
unsigned int required_fault;
|
||||||
unsigned long i, npages;
|
unsigned long i, npages;
|
||||||
uint64_t *pfns;
|
unsigned long *hmm_pfns;
|
||||||
|
|
||||||
i = (addr - range->start) >> PAGE_SHIFT;
|
i = (addr - range->start) >> PAGE_SHIFT;
|
||||||
npages = (end - addr) >> PAGE_SHIFT;
|
npages = (end - addr) >> PAGE_SHIFT;
|
||||||
pfns = &range->pfns[i];
|
hmm_pfns = &range->hmm_pfns[i];
|
||||||
required_fault = hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0);
|
required_fault =
|
||||||
|
hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0);
|
||||||
if (!walk->vma) {
|
if (!walk->vma) {
|
||||||
if (required_fault)
|
if (required_fault)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
@ -174,46 +162,44 @@ static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
|
||||||
}
|
}
|
||||||
if (required_fault)
|
if (required_fault)
|
||||||
return hmm_vma_fault(addr, end, required_fault, walk);
|
return hmm_vma_fault(addr, end, required_fault, walk);
|
||||||
hmm_vma_walk->last = addr;
|
return hmm_pfns_fill(addr, end, range, 0);
|
||||||
return hmm_pfns_fill(addr, end, range, HMM_PFN_NONE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
|
static inline unsigned long pmd_to_hmm_pfn_flags(struct hmm_range *range,
|
||||||
|
pmd_t pmd)
|
||||||
{
|
{
|
||||||
if (pmd_protnone(pmd))
|
if (pmd_protnone(pmd))
|
||||||
return 0;
|
return 0;
|
||||||
return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
|
return pmd_write(pmd) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
|
||||||
range->flags[HMM_PFN_WRITE] :
|
|
||||||
range->flags[HMM_PFN_VALID];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
|
static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
|
||||||
unsigned long end, uint64_t *pfns, pmd_t pmd)
|
unsigned long end, unsigned long hmm_pfns[],
|
||||||
|
pmd_t pmd)
|
||||||
{
|
{
|
||||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||||
struct hmm_range *range = hmm_vma_walk->range;
|
struct hmm_range *range = hmm_vma_walk->range;
|
||||||
unsigned long pfn, npages, i;
|
unsigned long pfn, npages, i;
|
||||||
unsigned int required_fault;
|
unsigned int required_fault;
|
||||||
uint64_t cpu_flags;
|
unsigned long cpu_flags;
|
||||||
|
|
||||||
npages = (end - addr) >> PAGE_SHIFT;
|
npages = (end - addr) >> PAGE_SHIFT;
|
||||||
cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
|
cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
|
||||||
required_fault =
|
required_fault =
|
||||||
hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags);
|
hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, cpu_flags);
|
||||||
if (required_fault)
|
if (required_fault)
|
||||||
return hmm_vma_fault(addr, end, required_fault, walk);
|
return hmm_vma_fault(addr, end, required_fault, walk);
|
||||||
|
|
||||||
pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||||
for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
||||||
pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
|
hmm_pfns[i] = pfn | cpu_flags;
|
||||||
hmm_vma_walk->last = end;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
|
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||||
/* stub to allow the code below to compile */
|
/* stub to allow the code below to compile */
|
||||||
int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
|
int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
|
||||||
unsigned long end, uint64_t *pfns, pmd_t pmd);
|
unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
|
||||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||||
|
|
||||||
static inline bool hmm_is_device_private_entry(struct hmm_range *range,
|
static inline bool hmm_is_device_private_entry(struct hmm_range *range,
|
||||||
|
@ -224,31 +210,31 @@ static inline bool hmm_is_device_private_entry(struct hmm_range *range,
|
||||||
range->dev_private_owner;
|
range->dev_private_owner;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
|
static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
|
||||||
|
pte_t pte)
|
||||||
{
|
{
|
||||||
if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
|
if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
|
||||||
return 0;
|
return 0;
|
||||||
return pte_write(pte) ? range->flags[HMM_PFN_VALID] |
|
return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
|
||||||
range->flags[HMM_PFN_WRITE] :
|
|
||||||
range->flags[HMM_PFN_VALID];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
||||||
unsigned long end, pmd_t *pmdp, pte_t *ptep,
|
unsigned long end, pmd_t *pmdp, pte_t *ptep,
|
||||||
uint64_t *pfn)
|
unsigned long *hmm_pfn)
|
||||||
{
|
{
|
||||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||||
struct hmm_range *range = hmm_vma_walk->range;
|
struct hmm_range *range = hmm_vma_walk->range;
|
||||||
unsigned int required_fault;
|
unsigned int required_fault;
|
||||||
uint64_t cpu_flags;
|
unsigned long cpu_flags;
|
||||||
pte_t pte = *ptep;
|
pte_t pte = *ptep;
|
||||||
uint64_t orig_pfn = *pfn;
|
uint64_t pfn_req_flags = *hmm_pfn;
|
||||||
|
|
||||||
if (pte_none(pte)) {
|
if (pte_none(pte)) {
|
||||||
required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0);
|
required_fault =
|
||||||
|
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
|
||||||
if (required_fault)
|
if (required_fault)
|
||||||
goto fault;
|
goto fault;
|
||||||
*pfn = range->values[HMM_PFN_NONE];
|
*hmm_pfn = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,17 +246,18 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
||||||
* the PFN even if not present.
|
* the PFN even if not present.
|
||||||
*/
|
*/
|
||||||
if (hmm_is_device_private_entry(range, entry)) {
|
if (hmm_is_device_private_entry(range, entry)) {
|
||||||
*pfn = hmm_device_entry_from_pfn(range,
|
cpu_flags = HMM_PFN_VALID;
|
||||||
device_private_entry_to_pfn(entry));
|
|
||||||
*pfn |= range->flags[HMM_PFN_VALID];
|
|
||||||
if (is_write_device_private_entry(entry))
|
if (is_write_device_private_entry(entry))
|
||||||
*pfn |= range->flags[HMM_PFN_WRITE];
|
cpu_flags |= HMM_PFN_WRITE;
|
||||||
|
*hmm_pfn = device_private_entry_to_pfn(entry) |
|
||||||
|
cpu_flags;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0);
|
required_fault =
|
||||||
|
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
|
||||||
if (!required_fault) {
|
if (!required_fault) {
|
||||||
*pfn = range->values[HMM_PFN_NONE];
|
*hmm_pfn = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -290,7 +277,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_flags = pte_to_hmm_pfn_flags(range, pte);
|
cpu_flags = pte_to_hmm_pfn_flags(range, pte);
|
||||||
required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags);
|
required_fault =
|
||||||
|
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
|
||||||
if (required_fault)
|
if (required_fault)
|
||||||
goto fault;
|
goto fault;
|
||||||
|
|
||||||
|
@ -299,15 +287,15 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
||||||
* fall through and treat it like a normal page.
|
* fall through and treat it like a normal page.
|
||||||
*/
|
*/
|
||||||
if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
|
if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
|
||||||
if (hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0)) {
|
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
|
||||||
pte_unmap(ptep);
|
pte_unmap(ptep);
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
*pfn = range->values[HMM_PFN_SPECIAL];
|
*hmm_pfn = HMM_PFN_ERROR;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
*pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags;
|
*hmm_pfn = pte_pfn(pte) | cpu_flags;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
fault:
|
fault:
|
||||||
|
@ -323,7 +311,8 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
|
||||||
{
|
{
|
||||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||||
struct hmm_range *range = hmm_vma_walk->range;
|
struct hmm_range *range = hmm_vma_walk->range;
|
||||||
uint64_t *pfns = &range->pfns[(start - range->start) >> PAGE_SHIFT];
|
unsigned long *hmm_pfns =
|
||||||
|
&range->hmm_pfns[(start - range->start) >> PAGE_SHIFT];
|
||||||
unsigned long npages = (end - start) >> PAGE_SHIFT;
|
unsigned long npages = (end - start) >> PAGE_SHIFT;
|
||||||
unsigned long addr = start;
|
unsigned long addr = start;
|
||||||
pte_t *ptep;
|
pte_t *ptep;
|
||||||
|
@ -335,16 +324,16 @@ again:
|
||||||
return hmm_vma_walk_hole(start, end, -1, walk);
|
return hmm_vma_walk_hole(start, end, -1, walk);
|
||||||
|
|
||||||
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
|
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
|
||||||
if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) {
|
if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) {
|
||||||
hmm_vma_walk->last = addr;
|
hmm_vma_walk->last = addr;
|
||||||
pmd_migration_entry_wait(walk->mm, pmdp);
|
pmd_migration_entry_wait(walk->mm, pmdp);
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
|
return hmm_pfns_fill(start, end, range, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pmd_present(pmd)) {
|
if (!pmd_present(pmd)) {
|
||||||
if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0))
|
if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
||||||
}
|
}
|
||||||
|
@ -364,7 +353,7 @@ again:
|
||||||
if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
|
if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
|
||||||
goto again;
|
goto again;
|
||||||
|
|
||||||
return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd);
|
return hmm_vma_handle_pmd(walk, addr, end, hmm_pfns, pmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -374,37 +363,33 @@ again:
|
||||||
* recover.
|
* recover.
|
||||||
*/
|
*/
|
||||||
if (pmd_bad(pmd)) {
|
if (pmd_bad(pmd)) {
|
||||||
if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0))
|
if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
ptep = pte_offset_map(pmdp, addr);
|
ptep = pte_offset_map(pmdp, addr);
|
||||||
for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) {
|
for (; addr < end; addr += PAGE_SIZE, ptep++, hmm_pfns++) {
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns);
|
r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, hmm_pfns);
|
||||||
if (r) {
|
if (r) {
|
||||||
/* hmm_vma_handle_pte() did pte_unmap() */
|
/* hmm_vma_handle_pte() did pte_unmap() */
|
||||||
hmm_vma_walk->last = addr;
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pte_unmap(ptep - 1);
|
pte_unmap(ptep - 1);
|
||||||
|
|
||||||
hmm_vma_walk->last = addr;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
|
#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
|
||||||
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
|
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
|
||||||
static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
|
static inline unsigned long pud_to_hmm_pfn_flags(struct hmm_range *range,
|
||||||
|
pud_t pud)
|
||||||
{
|
{
|
||||||
if (!pud_present(pud))
|
if (!pud_present(pud))
|
||||||
return 0;
|
return 0;
|
||||||
return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
|
return pud_write(pud) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
|
||||||
range->flags[HMM_PFN_WRITE] :
|
|
||||||
range->flags[HMM_PFN_VALID];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
||||||
|
@ -432,7 +417,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
||||||
if (pud_huge(pud) && pud_devmap(pud)) {
|
if (pud_huge(pud) && pud_devmap(pud)) {
|
||||||
unsigned long i, npages, pfn;
|
unsigned long i, npages, pfn;
|
||||||
unsigned int required_fault;
|
unsigned int required_fault;
|
||||||
uint64_t *pfns, cpu_flags;
|
unsigned long *hmm_pfns;
|
||||||
|
unsigned long cpu_flags;
|
||||||
|
|
||||||
if (!pud_present(pud)) {
|
if (!pud_present(pud)) {
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
|
@ -441,10 +427,10 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
||||||
|
|
||||||
i = (addr - range->start) >> PAGE_SHIFT;
|
i = (addr - range->start) >> PAGE_SHIFT;
|
||||||
npages = (end - addr) >> PAGE_SHIFT;
|
npages = (end - addr) >> PAGE_SHIFT;
|
||||||
pfns = &range->pfns[i];
|
hmm_pfns = &range->hmm_pfns[i];
|
||||||
|
|
||||||
cpu_flags = pud_to_hmm_pfn_flags(range, pud);
|
cpu_flags = pud_to_hmm_pfn_flags(range, pud);
|
||||||
required_fault = hmm_range_need_fault(hmm_vma_walk, pfns,
|
required_fault = hmm_range_need_fault(hmm_vma_walk, hmm_pfns,
|
||||||
npages, cpu_flags);
|
npages, cpu_flags);
|
||||||
if (required_fault) {
|
if (required_fault) {
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
|
@ -453,9 +439,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
||||||
|
|
||||||
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||||
for (i = 0; i < npages; ++i, ++pfn)
|
for (i = 0; i < npages; ++i, ++pfn)
|
||||||
pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
|
hmm_pfns[i] = pfn | cpu_flags;
|
||||||
cpu_flags;
|
|
||||||
hmm_vma_walk->last = end;
|
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -479,8 +463,9 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
||||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||||
struct hmm_range *range = hmm_vma_walk->range;
|
struct hmm_range *range = hmm_vma_walk->range;
|
||||||
struct vm_area_struct *vma = walk->vma;
|
struct vm_area_struct *vma = walk->vma;
|
||||||
uint64_t orig_pfn, cpu_flags;
|
|
||||||
unsigned int required_fault;
|
unsigned int required_fault;
|
||||||
|
unsigned long pfn_req_flags;
|
||||||
|
unsigned long cpu_flags;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
pte_t entry;
|
pte_t entry;
|
||||||
|
|
||||||
|
@ -488,9 +473,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
||||||
entry = huge_ptep_get(pte);
|
entry = huge_ptep_get(pte);
|
||||||
|
|
||||||
i = (start - range->start) >> PAGE_SHIFT;
|
i = (start - range->start) >> PAGE_SHIFT;
|
||||||
orig_pfn = range->pfns[i];
|
pfn_req_flags = range->hmm_pfns[i];
|
||||||
cpu_flags = pte_to_hmm_pfn_flags(range, entry);
|
cpu_flags = pte_to_hmm_pfn_flags(range, entry);
|
||||||
required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags);
|
required_fault =
|
||||||
|
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
|
||||||
if (required_fault) {
|
if (required_fault) {
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
return hmm_vma_fault(addr, end, required_fault, walk);
|
return hmm_vma_fault(addr, end, required_fault, walk);
|
||||||
|
@ -498,9 +484,8 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
||||||
|
|
||||||
pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
|
pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
|
||||||
for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
||||||
range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
|
range->hmm_pfns[i] = pfn | cpu_flags;
|
||||||
cpu_flags;
|
|
||||||
hmm_vma_walk->last = end;
|
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -531,13 +516,12 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
|
||||||
* failure.
|
* failure.
|
||||||
*/
|
*/
|
||||||
if (hmm_range_need_fault(hmm_vma_walk,
|
if (hmm_range_need_fault(hmm_vma_walk,
|
||||||
range->pfns +
|
range->hmm_pfns +
|
||||||
((start - range->start) >> PAGE_SHIFT),
|
((start - range->start) >> PAGE_SHIFT),
|
||||||
(end - start) >> PAGE_SHIFT, 0))
|
(end - start) >> PAGE_SHIFT, 0))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
||||||
hmm_vma_walk->last = end;
|
|
||||||
|
|
||||||
/* Skip this vma and continue processing the next vma. */
|
/* Skip this vma and continue processing the next vma. */
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -555,9 +539,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
|
||||||
* hmm_range_fault - try to fault some address in a virtual address range
|
* hmm_range_fault - try to fault some address in a virtual address range
|
||||||
* @range: argument structure
|
* @range: argument structure
|
||||||
*
|
*
|
||||||
* Return: the number of valid pages in range->pfns[] (from range start
|
* Returns 0 on success or one of the following error codes:
|
||||||
* address), which may be zero. On error one of the following status codes
|
|
||||||
* can be returned:
|
|
||||||
*
|
*
|
||||||
* -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma
|
* -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma
|
||||||
* (e.g., device file vma).
|
* (e.g., device file vma).
|
||||||
|
@ -572,7 +554,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
|
||||||
* This is similar to get_user_pages(), except that it can read the page tables
|
* This is similar to get_user_pages(), except that it can read the page tables
|
||||||
* without mutating them (ie causing faults).
|
* without mutating them (ie causing faults).
|
||||||
*/
|
*/
|
||||||
long hmm_range_fault(struct hmm_range *range)
|
int hmm_range_fault(struct hmm_range *range)
|
||||||
{
|
{
|
||||||
struct hmm_vma_walk hmm_vma_walk = {
|
struct hmm_vma_walk hmm_vma_walk = {
|
||||||
.range = range,
|
.range = range,
|
||||||
|
@ -590,10 +572,13 @@ long hmm_range_fault(struct hmm_range *range)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
|
ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
|
||||||
&hmm_walk_ops, &hmm_vma_walk);
|
&hmm_walk_ops, &hmm_vma_walk);
|
||||||
|
/*
|
||||||
|
* When -EBUSY is returned the loop restarts with
|
||||||
|
* hmm_vma_walk.last set to an address that has not been stored
|
||||||
|
* in pfns. All entries < last in the pfn array are set to their
|
||||||
|
* output, and all >= are still at their input values.
|
||||||
|
*/
|
||||||
} while (ret == -EBUSY);
|
} while (ret == -EBUSY);
|
||||||
|
return ret;
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(hmm_range_fault);
|
EXPORT_SYMBOL(hmm_range_fault);
|
||||||
|
|
|
@ -17,3 +17,4 @@ gup_benchmark
|
||||||
va_128TBswitch
|
va_128TBswitch
|
||||||
map_fixed_noreplace
|
map_fixed_noreplace
|
||||||
write_to_hugetlbfs
|
write_to_hugetlbfs
|
||||||
|
hmm-tests
|
||||||
|
|
|
@ -7,6 +7,7 @@ CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
|
||||||
LDLIBS = -lrt
|
LDLIBS = -lrt
|
||||||
TEST_GEN_FILES = compaction_test
|
TEST_GEN_FILES = compaction_test
|
||||||
TEST_GEN_FILES += gup_benchmark
|
TEST_GEN_FILES += gup_benchmark
|
||||||
|
TEST_GEN_FILES += hmm-tests
|
||||||
TEST_GEN_FILES += hugepage-mmap
|
TEST_GEN_FILES += hugepage-mmap
|
||||||
TEST_GEN_FILES += hugepage-shm
|
TEST_GEN_FILES += hugepage-shm
|
||||||
TEST_GEN_FILES += map_hugetlb
|
TEST_GEN_FILES += map_hugetlb
|
||||||
|
@ -33,6 +34,8 @@ TEST_FILES := test_vmalloc.sh
|
||||||
KSFT_KHDR_INSTALL := 1
|
KSFT_KHDR_INSTALL := 1
|
||||||
include ../lib.mk
|
include ../lib.mk
|
||||||
|
|
||||||
|
$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
|
||||||
|
|
||||||
$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
|
$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
|
||||||
|
|
||||||
$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
|
$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
CONFIG_SYSVIPC=y
|
CONFIG_SYSVIPC=y
|
||||||
CONFIG_USERFAULTFD=y
|
CONFIG_USERFAULTFD=y
|
||||||
CONFIG_TEST_VMALLOC=m
|
CONFIG_TEST_VMALLOC=m
|
||||||
|
CONFIG_DEVICE_PRIVATE=y
|
||||||
|
CONFIG_TEST_HMM=m
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -307,4 +307,20 @@ else
|
||||||
echo "[FAIL]"
|
echo "[FAIL]"
|
||||||
exitcode=1
|
exitcode=1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo "running HMM smoke test"
|
||||||
|
echo "------------------------------------"
|
||||||
|
./test_hmm.sh smoke
|
||||||
|
ret_val=$?
|
||||||
|
|
||||||
|
if [ $ret_val -eq 0 ]; then
|
||||||
|
echo "[PASS]"
|
||||||
|
elif [ $ret_val -eq $ksft_skip ]; then
|
||||||
|
echo "[SKIP]"
|
||||||
|
exitcode=$ksft_skip
|
||||||
|
else
|
||||||
|
echo "[FAIL]"
|
||||||
|
exitcode=1
|
||||||
|
fi
|
||||||
|
|
||||||
exit $exitcode
|
exit $exitcode
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
#
|
||||||
|
# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
|
||||||
|
#
|
||||||
|
# This is a test script for the kernel test driver to analyse vmalloc
|
||||||
|
# allocator. Therefore it is just a kernel module loader. You can specify
|
||||||
|
# and pass different parameters in order to:
|
||||||
|
# a) analyse performance of vmalloc allocations;
|
||||||
|
# b) stressing and stability check of vmalloc subsystem.
|
||||||
|
|
||||||
|
TEST_NAME="test_hmm"
|
||||||
|
DRIVER="test_hmm"
|
||||||
|
|
||||||
|
# 1 if fails
|
||||||
|
exitcode=1
|
||||||
|
|
||||||
|
# Kselftest framework requirement - SKIP code is 4.
|
||||||
|
ksft_skip=4
|
||||||
|
|
||||||
|
check_test_requirements()
|
||||||
|
{
|
||||||
|
uid=$(id -u)
|
||||||
|
if [ $uid -ne 0 ]; then
|
||||||
|
echo "$0: Must be run as root"
|
||||||
|
exit $ksft_skip
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! which modprobe > /dev/null 2>&1; then
|
||||||
|
echo "$0: You need modprobe installed"
|
||||||
|
exit $ksft_skip
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! modinfo $DRIVER > /dev/null 2>&1; then
|
||||||
|
echo "$0: You must have the following enabled in your kernel:"
|
||||||
|
echo "CONFIG_TEST_HMM=m"
|
||||||
|
exit $ksft_skip
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
load_driver()
|
||||||
|
{
|
||||||
|
modprobe $DRIVER > /dev/null 2>&1
|
||||||
|
if [ $? == 0 ]; then
|
||||||
|
major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
|
||||||
|
mknod /dev/hmm_dmirror0 c $major 0
|
||||||
|
mknod /dev/hmm_dmirror1 c $major 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
unload_driver()
|
||||||
|
{
|
||||||
|
modprobe -r $DRIVER > /dev/null 2>&1
|
||||||
|
rm -f /dev/hmm_dmirror?
|
||||||
|
}
|
||||||
|
|
||||||
|
run_smoke()
|
||||||
|
{
|
||||||
|
echo "Running smoke test. Note, this test provides basic coverage."
|
||||||
|
|
||||||
|
load_driver
|
||||||
|
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
|
||||||
|
unload_driver
|
||||||
|
}
|
||||||
|
|
||||||
|
usage()
|
||||||
|
{
|
||||||
|
echo -n "Usage: $0"
|
||||||
|
echo
|
||||||
|
echo "Example usage:"
|
||||||
|
echo
|
||||||
|
echo "# Shows help message"
|
||||||
|
echo "./${TEST_NAME}.sh"
|
||||||
|
echo
|
||||||
|
echo "# Smoke testing"
|
||||||
|
echo "./${TEST_NAME}.sh smoke"
|
||||||
|
echo
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
function run_test()
|
||||||
|
{
|
||||||
|
if [ $# -eq 0 ]; then
|
||||||
|
usage
|
||||||
|
else
|
||||||
|
if [ "$1" = "smoke" ]; then
|
||||||
|
run_smoke
|
||||||
|
else
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
check_test_requirements
|
||||||
|
run_test $@
|
||||||
|
|
||||||
|
exit 0
|
Loading…
Reference in New Issue