RDMA/odp: Use the common interval tree library instead of generic

ODP is working with userspace VA's in the interval tree which always fit
into an unsigned long, so we can use the common code.

This comes at a cost of a 16 byte increase in ib_umem_odp struct size due
to storing the interval tree start/last in addition to the umem
addr/length. However these values were computed and are performance
critical for the interval lookup, so this seems like a worthwhile trade
off.

Removes 2k of .text from the kernel.

Link: https://lore.kernel.org/r/20190819111710.18440-2-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Jason Gunthorpe 2019-08-19 14:16:59 +03:00
parent 27b7fb1ab7
commit 7cc2e18f21
3 changed files with 31 additions and 62 deletions

View File

@ -55,6 +55,7 @@ config INFINIBAND_ON_DEMAND_PAGING
bool "InfiniBand on-demand paging support" bool "InfiniBand on-demand paging support"
depends on INFINIBAND_USER_MEM depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER select MMU_NOTIFIER
select INTERVAL_TREE
default y default y
---help--- ---help---
On demand paging support for the InfiniBand subsystem. On demand paging support for the InfiniBand subsystem.

View File

@ -39,45 +39,13 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/interval_tree_generic.h> #include <linux/interval_tree.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h> #include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h> #include <rdma/ib_umem_odp.h>
/*
* The ib_umem list keeps track of memory regions for which the HW
* device request to receive notification when the related memory
* mapping is changed.
*
* ib_umem_lock protects the list.
*/
static u64 node_start(struct umem_odp_node *n)
{
struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree);
return ib_umem_start(umem_odp);
}
/* Note that the representation of the intervals in the interval tree
* considers the ending point as contained in the interval, while the
* function ib_umem_end returns the first address which is not contained
* in the umem.
*/
static u64 node_last(struct umem_odp_node *n)
{
struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree);
return ib_umem_end(umem_odp) - 1;
}
INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
node_start, node_last, static, rbt_ib_umem)
static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp) static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
{ {
mutex_lock(&umem_odp->umem_mutex); mutex_lock(&umem_odp->umem_mutex);
@ -205,9 +173,18 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
down_write(&per_mm->umem_rwsem); down_write(&per_mm->umem_rwsem);
if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) {
rbt_ib_umem_insert(&umem_odp->interval_tree, /*
&per_mm->umem_tree); * Note that the representation of the intervals in the
* interval tree considers the ending point as contained in
* the interval, while the function ib_umem_end returns the
* first address which is not contained in the umem.
*/
umem_odp->interval_tree.start = ib_umem_start(umem_odp);
umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
interval_tree_insert(&umem_odp->interval_tree,
&per_mm->umem_tree);
}
up_write(&per_mm->umem_rwsem); up_write(&per_mm->umem_rwsem);
} }
@ -217,8 +194,8 @@ static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
down_write(&per_mm->umem_rwsem); down_write(&per_mm->umem_rwsem);
if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
rbt_ib_umem_remove(&umem_odp->interval_tree, interval_tree_remove(&umem_odp->interval_tree,
&per_mm->umem_tree); &per_mm->umem_tree);
complete_all(&umem_odp->notifier_completion); complete_all(&umem_odp->notifier_completion);
up_write(&per_mm->umem_rwsem); up_write(&per_mm->umem_rwsem);
@ -761,18 +738,18 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
void *cookie) void *cookie)
{ {
int ret_val = 0; int ret_val = 0;
struct umem_odp_node *node, *next; struct interval_tree_node *node, *next;
struct ib_umem_odp *umem; struct ib_umem_odp *umem;
if (unlikely(start == last)) if (unlikely(start == last))
return ret_val; return ret_val;
for (node = rbt_ib_umem_iter_first(root, start, last - 1); for (node = interval_tree_iter_first(root, start, last - 1);
node; node = next) { node; node = next) {
/* TODO move the blockable decision up to the callback */ /* TODO move the blockable decision up to the callback */
if (!blockable) if (!blockable)
return -EAGAIN; return -EAGAIN;
next = rbt_ib_umem_iter_next(node, start, last - 1); next = interval_tree_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree); umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem, start, last, cookie) || ret_val; ret_val = cb(umem, start, last, cookie) || ret_val;
} }
@ -780,16 +757,3 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
return ret_val; return ret_val;
} }
EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
u64 addr, u64 length)
{
struct umem_odp_node *node;
node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
if (node)
return container_of(node, struct ib_umem_odp, interval_tree);
return NULL;
}
EXPORT_SYMBOL(rbt_ib_umem_lookup);

View File

@ -37,11 +37,6 @@
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <linux/interval_tree.h> #include <linux/interval_tree.h>
struct umem_odp_node {
u64 __subtree_last;
struct rb_node rb;
};
struct ib_umem_odp { struct ib_umem_odp {
struct ib_umem umem; struct ib_umem umem;
struct ib_ucontext_per_mm *per_mm; struct ib_ucontext_per_mm *per_mm;
@ -72,7 +67,7 @@ struct ib_umem_odp {
int npages; int npages;
/* Tree tracking */ /* Tree tracking */
struct umem_odp_node interval_tree; struct interval_tree_node interval_tree;
struct completion notifier_completion; struct completion notifier_completion;
int dying; int dying;
@ -163,8 +158,17 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
* Find first region intersecting with address range. * Find first region intersecting with address range.
* Return NULL if not found * Return NULL if not found
*/ */
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, static inline struct ib_umem_odp *
u64 addr, u64 length); rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length)
{
struct interval_tree_node *node;
node = interval_tree_iter_first(root, addr, addr + length - 1);
if (!node)
return NULL;
return container_of(node, struct ib_umem_odp, interval_tree);
}
static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
unsigned long mmu_seq) unsigned long mmu_seq)