RDMA/mlx5: Split mlx5_ib_update_xlt() into ODP and non-ODP cases
Mixing these together is just a mess, make a dedicated version, mlx5_ib_update_mr_pas(), which directly loads the whole MTT for a non-ODP MR. The split out version can trivially use a simple loop with rdma_for_each_block() which allows using the core code to compute the MR pages and avoids seeking in the SGL list after each chunk as the __mlx5_ib_populate_pas() call required. Significantly speeds loading large MTTs. Link: https://lore.kernel.org/r/20201026132314.1336717-5-leon@kernel.org Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
8010d74b99
commit
f1eaac37da
|
@ -91,70 +91,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
|
||||||
*shift = PAGE_SHIFT + m;
|
*shift = PAGE_SHIFT + m;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Populate the given array with bus addresses from the umem.
|
|
||||||
*
|
|
||||||
* dev - mlx5_ib device
|
|
||||||
* umem - umem to use to fill the pages
|
|
||||||
* page_shift - determines the page size used in the resulting array
|
|
||||||
* offset - offset into the umem to start from,
|
|
||||||
* only implemented for ODP umems
|
|
||||||
* num_pages - total number of pages to fill
|
|
||||||
* pas - bus addresses array to fill
|
|
||||||
* access_flags - access flags to set on all present pages.
|
|
||||||
use enum mlx5_ib_mtt_access_flags for this.
|
|
||||||
*/
|
|
||||||
void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
|
|
||||||
int page_shift, size_t offset, size_t num_pages,
|
|
||||||
__be64 *pas, int access_flags)
|
|
||||||
{
|
|
||||||
int shift = page_shift - PAGE_SHIFT;
|
|
||||||
int mask = (1 << shift) - 1;
|
|
||||||
int i, k, idx;
|
|
||||||
u64 cur = 0;
|
|
||||||
u64 base;
|
|
||||||
int len;
|
|
||||||
struct scatterlist *sg;
|
|
||||||
int entry;
|
|
||||||
|
|
||||||
i = 0;
|
|
||||||
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
|
|
||||||
len = sg_dma_len(sg) >> PAGE_SHIFT;
|
|
||||||
base = sg_dma_address(sg);
|
|
||||||
|
|
||||||
/* Skip elements below offset */
|
|
||||||
if (i + len < offset << shift) {
|
|
||||||
i += len;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Skip pages below offset */
|
|
||||||
if (i < offset << shift) {
|
|
||||||
k = (offset << shift) - i;
|
|
||||||
i = offset << shift;
|
|
||||||
} else {
|
|
||||||
k = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; k < len; k++) {
|
|
||||||
if (!(i & mask)) {
|
|
||||||
cur = base + (k << PAGE_SHIFT);
|
|
||||||
cur |= access_flags;
|
|
||||||
idx = (i >> shift) - offset;
|
|
||||||
|
|
||||||
pas[idx] = cpu_to_be64(cur);
|
|
||||||
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
|
|
||||||
i >> shift, be64_to_cpu(pas[idx]));
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
|
|
||||||
/* Stop after num_pages reached */
|
|
||||||
if (i >> shift >= offset + num_pages)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
|
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
|
||||||
* filled in the pas array.
|
* filled in the pas array.
|
||||||
|
|
|
@ -1232,9 +1232,6 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
|
||||||
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
|
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
|
||||||
unsigned long max_page_shift,
|
unsigned long max_page_shift,
|
||||||
int *shift);
|
int *shift);
|
||||||
void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
|
|
||||||
int page_shift, size_t offset, size_t num_pages,
|
|
||||||
__be64 *pas, int access_flags);
|
|
||||||
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
|
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
|
||||||
u64 access_flags);
|
u64 access_flags);
|
||||||
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
|
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
|
||||||
|
|
|
@ -1116,6 +1116,21 @@ static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
|
||||||
mlx5_ib_free_xlt(xlt, sg->length);
|
mlx5_ib_free_xlt(xlt, sg->length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int xlt_wr_final_send_flags(unsigned int flags)
|
||||||
|
{
|
||||||
|
unsigned int res = 0;
|
||||||
|
|
||||||
|
if (flags & MLX5_IB_UPD_XLT_ENABLE)
|
||||||
|
res |= MLX5_IB_SEND_UMR_ENABLE_MR |
|
||||||
|
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
|
||||||
|
MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
|
||||||
|
if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
|
||||||
|
res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
|
||||||
|
if (flags & MLX5_IB_UPD_XLT_ADDR)
|
||||||
|
res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
||||||
int page_shift, int flags)
|
int page_shift, int flags)
|
||||||
{
|
{
|
||||||
|
@ -1140,6 +1155,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
||||||
!umr_can_use_indirect_mkey(dev))
|
!umr_can_use_indirect_mkey(dev))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
|
if (WARN_ON(!mr->umem->is_odp))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
|
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
|
||||||
* so we need to align the offset and length accordingly
|
* so we need to align the offset and length accordingly
|
||||||
*/
|
*/
|
||||||
|
@ -1155,13 +1173,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
||||||
pages_iter = sg.length / desc_size;
|
pages_iter = sg.length / desc_size;
|
||||||
orig_sg_length = sg.length;
|
orig_sg_length = sg.length;
|
||||||
|
|
||||||
if (mr->umem->is_odp) {
|
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
|
||||||
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
|
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
|
||||||
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
|
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
|
||||||
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
|
|
||||||
|
|
||||||
pages_to_map = min_t(size_t, pages_to_map, max_pages);
|
pages_to_map = min_t(size_t, pages_to_map, max_pages);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wr.page_shift = page_shift;
|
wr.page_shift = page_shift;
|
||||||
|
@ -1173,36 +1189,14 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
||||||
size_to_map = npages * desc_size;
|
size_to_map = npages * desc_size;
|
||||||
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
|
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
|
||||||
DMA_TO_DEVICE);
|
DMA_TO_DEVICE);
|
||||||
if (mr->umem->is_odp) {
|
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
|
||||||
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
|
|
||||||
} else {
|
|
||||||
__mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
|
|
||||||
npages, xlt,
|
|
||||||
MLX5_IB_MTT_PRESENT);
|
|
||||||
/* Clear padding after the pages
|
|
||||||
* brought from the umem.
|
|
||||||
*/
|
|
||||||
memset(xlt + size_to_map, 0, sg.length - size_to_map);
|
|
||||||
}
|
|
||||||
dma_sync_single_for_device(ddev, sg.addr, sg.length,
|
dma_sync_single_for_device(ddev, sg.addr, sg.length,
|
||||||
DMA_TO_DEVICE);
|
DMA_TO_DEVICE);
|
||||||
|
|
||||||
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
|
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
|
||||||
|
|
||||||
if (pages_mapped + pages_iter >= pages_to_map) {
|
if (pages_mapped + pages_iter >= pages_to_map)
|
||||||
if (flags & MLX5_IB_UPD_XLT_ENABLE)
|
wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
|
||||||
wr.wr.send_flags |=
|
|
||||||
MLX5_IB_SEND_UMR_ENABLE_MR |
|
|
||||||
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
|
|
||||||
MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
|
|
||||||
if (flags & MLX5_IB_UPD_XLT_PD ||
|
|
||||||
flags & MLX5_IB_UPD_XLT_ACCESS)
|
|
||||||
wr.wr.send_flags |=
|
|
||||||
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
|
|
||||||
if (flags & MLX5_IB_UPD_XLT_ADDR)
|
|
||||||
wr.wr.send_flags |=
|
|
||||||
MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
|
|
||||||
}
|
|
||||||
|
|
||||||
wr.offset = idx * desc_size;
|
wr.offset = idx * desc_size;
|
||||||
wr.xlt_size = sg.length;
|
wr.xlt_size = sg.length;
|
||||||
|
@ -1214,6 +1208,69 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Send the DMA list to the HW for a normal MR using UMR.
|
||||||
|
*/
|
||||||
|
static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
|
||||||
|
{
|
||||||
|
struct mlx5_ib_dev *dev = mr->dev;
|
||||||
|
struct device *ddev = dev->ib_dev.dev.parent;
|
||||||
|
struct ib_block_iter biter;
|
||||||
|
struct mlx5_mtt *cur_mtt;
|
||||||
|
struct mlx5_umr_wr wr;
|
||||||
|
size_t orig_sg_length;
|
||||||
|
struct mlx5_mtt *mtt;
|
||||||
|
size_t final_size;
|
||||||
|
struct ib_sge sg;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if (WARN_ON(mr->umem->is_odp))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
|
||||||
|
ib_umem_num_dma_blocks(mr->umem,
|
||||||
|
1 << mr->page_shift),
|
||||||
|
sizeof(*mtt), flags);
|
||||||
|
if (!mtt)
|
||||||
|
return -ENOMEM;
|
||||||
|
orig_sg_length = sg.length;
|
||||||
|
|
||||||
|
cur_mtt = mtt;
|
||||||
|
rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
|
||||||
|
BIT(mr->page_shift)) {
|
||||||
|
if (cur_mtt == (void *)mtt + sg.length) {
|
||||||
|
dma_sync_single_for_device(ddev, sg.addr, sg.length,
|
||||||
|
DMA_TO_DEVICE);
|
||||||
|
err = mlx5_ib_post_send_wait(dev, &wr);
|
||||||
|
if (err)
|
||||||
|
goto err;
|
||||||
|
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
|
||||||
|
DMA_TO_DEVICE);
|
||||||
|
wr.offset += sg.length;
|
||||||
|
cur_mtt = mtt;
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_mtt->ptag =
|
||||||
|
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
|
||||||
|
MLX5_IB_MTT_PRESENT);
|
||||||
|
cur_mtt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
final_size = (void *)cur_mtt - (void *)mtt;
|
||||||
|
sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
|
||||||
|
memset(cur_mtt, 0, sg.length - final_size);
|
||||||
|
wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
|
||||||
|
wr.xlt_size = sg.length;
|
||||||
|
|
||||||
|
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
|
||||||
|
err = mlx5_ib_post_send_wait(dev, &wr);
|
||||||
|
|
||||||
|
err:
|
||||||
|
sg.length = orig_sg_length;
|
||||||
|
mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If ibmr is NULL it will be allocated by reg_create.
|
* If ibmr is NULL it will be allocated by reg_create.
|
||||||
* Else, the given ibmr will be used.
|
* Else, the given ibmr will be used.
|
||||||
|
@ -1480,12 +1537,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
||||||
* configured properly but left disabled. It is safe to go ahead
|
* configured properly but left disabled. It is safe to go ahead
|
||||||
* and configure it again via UMR while enabling it.
|
* and configure it again via UMR while enabling it.
|
||||||
*/
|
*/
|
||||||
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
|
err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
|
||||||
|
|
||||||
err = mlx5_ib_update_xlt(
|
|
||||||
mr, 0,
|
|
||||||
ib_umem_num_dma_blocks(umem, 1UL << mr->page_shift),
|
|
||||||
mr->page_shift, update_xlt_flags);
|
|
||||||
if (err) {
|
if (err) {
|
||||||
dereg_mr(dev, mr);
|
dereg_mr(dev, mr);
|
||||||
return ERR_PTR(err);
|
return ERR_PTR(err);
|
||||||
|
@ -1651,11 +1703,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
|
||||||
upd_flags |= MLX5_IB_UPD_XLT_PD;
|
upd_flags |= MLX5_IB_UPD_XLT_PD;
|
||||||
if (flags & IB_MR_REREG_ACCESS)
|
if (flags & IB_MR_REREG_ACCESS)
|
||||||
upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
|
upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
|
||||||
err = mlx5_ib_update_xlt(
|
err = mlx5_ib_update_mr_pas(mr, upd_flags);
|
||||||
mr, 0,
|
|
||||||
ib_umem_num_dma_blocks(mr->umem,
|
|
||||||
1UL << mr->page_shift),
|
|
||||||
mr->page_shift, upd_flags);
|
|
||||||
} else {
|
} else {
|
||||||
err = rereg_umr(pd, mr, access_flags, flags);
|
err = rereg_umr(pd, mr, access_flags, flags);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue