IB/umem: Add contiguous ODP support
Currenlty ODP supports only regular MMU pages. Add ODP support for regions consisting of physically contiguous chunks of arbitrary order (huge pages for instance) to improve performance. Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
4df4a5bac3
commit
403cd12e2c
|
@ -504,7 +504,6 @@ out:
|
||||||
static int ib_umem_odp_map_dma_single_page(
|
static int ib_umem_odp_map_dma_single_page(
|
||||||
struct ib_umem *umem,
|
struct ib_umem *umem,
|
||||||
int page_index,
|
int page_index,
|
||||||
u64 base_virt_addr,
|
|
||||||
struct page *page,
|
struct page *page,
|
||||||
u64 access_mask,
|
u64 access_mask,
|
||||||
unsigned long current_seq)
|
unsigned long current_seq)
|
||||||
|
@ -527,7 +526,7 @@ static int ib_umem_odp_map_dma_single_page(
|
||||||
if (!(umem->odp_data->dma_list[page_index])) {
|
if (!(umem->odp_data->dma_list[page_index])) {
|
||||||
dma_addr = ib_dma_map_page(dev,
|
dma_addr = ib_dma_map_page(dev,
|
||||||
page,
|
page,
|
||||||
0, PAGE_SIZE,
|
0, BIT(umem->page_shift),
|
||||||
DMA_BIDIRECTIONAL);
|
DMA_BIDIRECTIONAL);
|
||||||
if (ib_dma_mapping_error(dev, dma_addr)) {
|
if (ib_dma_mapping_error(dev, dma_addr)) {
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
|
@ -555,8 +554,9 @@ out:
|
||||||
if (remove_existing_mapping && umem->context->invalidate_range) {
|
if (remove_existing_mapping && umem->context->invalidate_range) {
|
||||||
invalidate_page_trampoline(
|
invalidate_page_trampoline(
|
||||||
umem,
|
umem,
|
||||||
base_virt_addr + (page_index * PAGE_SIZE),
|
ib_umem_start(umem) + (page_index >> umem->page_shift),
|
||||||
base_virt_addr + ((page_index+1)*PAGE_SIZE),
|
ib_umem_start(umem) + ((page_index + 1) >>
|
||||||
|
umem->page_shift),
|
||||||
NULL);
|
NULL);
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
}
|
}
|
||||||
|
@ -595,10 +595,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||||
struct task_struct *owning_process = NULL;
|
struct task_struct *owning_process = NULL;
|
||||||
struct mm_struct *owning_mm = NULL;
|
struct mm_struct *owning_mm = NULL;
|
||||||
struct page **local_page_list = NULL;
|
struct page **local_page_list = NULL;
|
||||||
u64 off;
|
u64 page_mask, off;
|
||||||
int j, k, ret = 0, start_idx, npages = 0;
|
int j, k, ret = 0, start_idx, npages = 0, page_shift;
|
||||||
u64 base_virt_addr;
|
|
||||||
unsigned int flags = 0;
|
unsigned int flags = 0;
|
||||||
|
phys_addr_t p = 0;
|
||||||
|
|
||||||
if (access_mask == 0)
|
if (access_mask == 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -611,9 +611,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||||
if (!local_page_list)
|
if (!local_page_list)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
off = user_virt & (~PAGE_MASK);
|
page_shift = umem->page_shift;
|
||||||
user_virt = user_virt & PAGE_MASK;
|
page_mask = ~(BIT(page_shift) - 1);
|
||||||
base_virt_addr = user_virt;
|
off = user_virt & (~page_mask);
|
||||||
|
user_virt = user_virt & page_mask;
|
||||||
bcnt += off; /* Charge for the first page offset as well. */
|
bcnt += off; /* Charge for the first page offset as well. */
|
||||||
|
|
||||||
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
|
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
|
||||||
|
@ -631,13 +632,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||||
if (access_mask & ODP_WRITE_ALLOWED_BIT)
|
if (access_mask & ODP_WRITE_ALLOWED_BIT)
|
||||||
flags |= FOLL_WRITE;
|
flags |= FOLL_WRITE;
|
||||||
|
|
||||||
start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
|
start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
|
||||||
k = start_idx;
|
k = start_idx;
|
||||||
|
|
||||||
while (bcnt > 0) {
|
while (bcnt > 0) {
|
||||||
const size_t gup_num_pages =
|
const size_t gup_num_pages = min_t(size_t,
|
||||||
min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
|
(bcnt + BIT(page_shift) - 1) >> page_shift,
|
||||||
PAGE_SIZE / sizeof(struct page *));
|
PAGE_SIZE / sizeof(struct page *));
|
||||||
|
|
||||||
down_read(&owning_mm->mmap_sem);
|
down_read(&owning_mm->mmap_sem);
|
||||||
/*
|
/*
|
||||||
|
@ -656,14 +657,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
|
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
|
||||||
user_virt += npages << PAGE_SHIFT;
|
|
||||||
mutex_lock(&umem->odp_data->umem_mutex);
|
mutex_lock(&umem->odp_data->umem_mutex);
|
||||||
for (j = 0; j < npages; ++j) {
|
for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
|
||||||
|
if (user_virt & ~page_mask) {
|
||||||
|
p += PAGE_SIZE;
|
||||||
|
if (page_to_phys(local_page_list[j]) != p) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
put_page(local_page_list[j]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
ret = ib_umem_odp_map_dma_single_page(
|
ret = ib_umem_odp_map_dma_single_page(
|
||||||
umem, k, base_virt_addr, local_page_list[j],
|
umem, k, local_page_list[j],
|
||||||
access_mask, current_seq);
|
access_mask, current_seq);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
p = page_to_phys(local_page_list[j]);
|
||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
mutex_unlock(&umem->odp_data->umem_mutex);
|
mutex_unlock(&umem->odp_data->umem_mutex);
|
||||||
|
@ -708,7 +720,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
|
||||||
* once. */
|
* once. */
|
||||||
mutex_lock(&umem->odp_data->umem_mutex);
|
mutex_lock(&umem->odp_data->umem_mutex);
|
||||||
for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
|
for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
|
||||||
idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
|
idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
|
||||||
if (umem->odp_data->page_list[idx]) {
|
if (umem->odp_data->page_list[idx]) {
|
||||||
struct page *page = umem->odp_data->page_list[idx];
|
struct page *page = umem->odp_data->page_list[idx];
|
||||||
dma_addr_t dma = umem->odp_data->dma_list[idx];
|
dma_addr_t dma = umem->odp_data->dma_list[idx];
|
||||||
|
|
|
@ -72,12 +72,12 @@ static inline unsigned long ib_umem_start(struct ib_umem *umem)
|
||||||
/* Returns the address of the page after the last one of an ODP umem. */
|
/* Returns the address of the page after the last one of an ODP umem. */
|
||||||
static inline unsigned long ib_umem_end(struct ib_umem *umem)
|
static inline unsigned long ib_umem_end(struct ib_umem *umem)
|
||||||
{
|
{
|
||||||
return PAGE_ALIGN(umem->address + umem->length);
|
return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
|
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
|
||||||
{
|
{
|
||||||
return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
|
return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_INFINIBAND_USER_MEM
|
#ifdef CONFIG_INFINIBAND_USER_MEM
|
||||||
|
|
Loading…
Reference in New Issue