RDMA/mlx5: Split mlx5_ib_update_xlt() into ODP and non-ODP cases
Mixing these together is just a mess, make a dedicated version, mlx5_ib_update_mr_pas(), which directly loads the whole MTT for a non-ODP MR. The split out version can trivially use a simple loop with rdma_for_each_block() which allows using the core code to compute the MR pages and avoids seeking in the SGL list after each chunk as the __mlx5_ib_populate_pas() call required. Significantly speeds loading large MTTs. Link: https://lore.kernel.org/r/20201026132314.1336717-5-leon@kernel.org Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
8010d74b99
commit
f1eaac37da
|
@ -91,70 +91,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
|
|||
*shift = PAGE_SHIFT + m;
|
||||
}
|
||||
|
||||
/*
|
||||
* Populate the given array with bus addresses from the umem.
|
||||
*
|
||||
* dev - mlx5_ib device
|
||||
* umem - umem to use to fill the pages
|
||||
* page_shift - determines the page size used in the resulting array
|
||||
* offset - offset into the umem to start from,
|
||||
* only implemented for ODP umems
|
||||
* num_pages - total number of pages to fill
|
||||
* pas - bus addresses array to fill
|
||||
* access_flags - access flags to set on all present pages.
|
||||
use enum mlx5_ib_mtt_access_flags for this.
|
||||
*/
|
||||
void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
|
||||
int page_shift, size_t offset, size_t num_pages,
|
||||
__be64 *pas, int access_flags)
|
||||
{
|
||||
int shift = page_shift - PAGE_SHIFT;
|
||||
int mask = (1 << shift) - 1;
|
||||
int i, k, idx;
|
||||
u64 cur = 0;
|
||||
u64 base;
|
||||
int len;
|
||||
struct scatterlist *sg;
|
||||
int entry;
|
||||
|
||||
i = 0;
|
||||
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
|
||||
len = sg_dma_len(sg) >> PAGE_SHIFT;
|
||||
base = sg_dma_address(sg);
|
||||
|
||||
/* Skip elements below offset */
|
||||
if (i + len < offset << shift) {
|
||||
i += len;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Skip pages below offset */
|
||||
if (i < offset << shift) {
|
||||
k = (offset << shift) - i;
|
||||
i = offset << shift;
|
||||
} else {
|
||||
k = 0;
|
||||
}
|
||||
|
||||
for (; k < len; k++) {
|
||||
if (!(i & mask)) {
|
||||
cur = base + (k << PAGE_SHIFT);
|
||||
cur |= access_flags;
|
||||
idx = (i >> shift) - offset;
|
||||
|
||||
pas[idx] = cpu_to_be64(cur);
|
||||
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
|
||||
i >> shift, be64_to_cpu(pas[idx]));
|
||||
}
|
||||
i++;
|
||||
|
||||
/* Stop after num_pages reached */
|
||||
if (i >> shift >= offset + num_pages)
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
|
||||
* filled in the pas array.
|
||||
|
|
|
@ -1232,9 +1232,6 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
|
|||
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
|
||||
unsigned long max_page_shift,
|
||||
int *shift);
|
||||
void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
|
||||
int page_shift, size_t offset, size_t num_pages,
|
||||
__be64 *pas, int access_flags);
|
||||
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
|
||||
u64 access_flags);
|
||||
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
|
||||
|
|
|
@ -1116,6 +1116,21 @@ static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
|
|||
mlx5_ib_free_xlt(xlt, sg->length);
|
||||
}
|
||||
|
||||
static unsigned int xlt_wr_final_send_flags(unsigned int flags)
|
||||
{
|
||||
unsigned int res = 0;
|
||||
|
||||
if (flags & MLX5_IB_UPD_XLT_ENABLE)
|
||||
res |= MLX5_IB_SEND_UMR_ENABLE_MR |
|
||||
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
|
||||
MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
|
||||
if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
|
||||
res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
|
||||
if (flags & MLX5_IB_UPD_XLT_ADDR)
|
||||
res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
|
||||
return res;
|
||||
}
|
||||
|
||||
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
||||
int page_shift, int flags)
|
||||
{
|
||||
|
@ -1140,6 +1155,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
|||
!umr_can_use_indirect_mkey(dev))
|
||||
return -EPERM;
|
||||
|
||||
if (WARN_ON(!mr->umem->is_odp))
|
||||
return -EINVAL;
|
||||
|
||||
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
|
||||
* so we need to align the offset and length accordingly
|
||||
*/
|
||||
|
@ -1155,13 +1173,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
|||
pages_iter = sg.length / desc_size;
|
||||
orig_sg_length = sg.length;
|
||||
|
||||
if (mr->umem->is_odp) {
|
||||
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
|
||||
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
|
||||
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
|
||||
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
|
||||
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
|
||||
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
|
||||
|
||||
pages_to_map = min_t(size_t, pages_to_map, max_pages);
|
||||
}
|
||||
pages_to_map = min_t(size_t, pages_to_map, max_pages);
|
||||
}
|
||||
|
||||
wr.page_shift = page_shift;
|
||||
|
@ -1173,36 +1189,14 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
|||
size_to_map = npages * desc_size;
|
||||
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
|
||||
DMA_TO_DEVICE);
|
||||
if (mr->umem->is_odp) {
|
||||
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
|
||||
} else {
|
||||
__mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
|
||||
npages, xlt,
|
||||
MLX5_IB_MTT_PRESENT);
|
||||
/* Clear padding after the pages
|
||||
* brought from the umem.
|
||||
*/
|
||||
memset(xlt + size_to_map, 0, sg.length - size_to_map);
|
||||
}
|
||||
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
|
||||
dma_sync_single_for_device(ddev, sg.addr, sg.length,
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
|
||||
|
||||
if (pages_mapped + pages_iter >= pages_to_map) {
|
||||
if (flags & MLX5_IB_UPD_XLT_ENABLE)
|
||||
wr.wr.send_flags |=
|
||||
MLX5_IB_SEND_UMR_ENABLE_MR |
|
||||
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
|
||||
MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
|
||||
if (flags & MLX5_IB_UPD_XLT_PD ||
|
||||
flags & MLX5_IB_UPD_XLT_ACCESS)
|
||||
wr.wr.send_flags |=
|
||||
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
|
||||
if (flags & MLX5_IB_UPD_XLT_ADDR)
|
||||
wr.wr.send_flags |=
|
||||
MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
|
||||
}
|
||||
if (pages_mapped + pages_iter >= pages_to_map)
|
||||
wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
|
||||
|
||||
wr.offset = idx * desc_size;
|
||||
wr.xlt_size = sg.length;
|
||||
|
@ -1214,6 +1208,69 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
|
|||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send the DMA list to the HW for a normal MR using UMR.
|
||||
*/
|
||||
static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = mr->dev;
|
||||
struct device *ddev = dev->ib_dev.dev.parent;
|
||||
struct ib_block_iter biter;
|
||||
struct mlx5_mtt *cur_mtt;
|
||||
struct mlx5_umr_wr wr;
|
||||
size_t orig_sg_length;
|
||||
struct mlx5_mtt *mtt;
|
||||
size_t final_size;
|
||||
struct ib_sge sg;
|
||||
int err = 0;
|
||||
|
||||
if (WARN_ON(mr->umem->is_odp))
|
||||
return -EINVAL;
|
||||
|
||||
mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
|
||||
ib_umem_num_dma_blocks(mr->umem,
|
||||
1 << mr->page_shift),
|
||||
sizeof(*mtt), flags);
|
||||
if (!mtt)
|
||||
return -ENOMEM;
|
||||
orig_sg_length = sg.length;
|
||||
|
||||
cur_mtt = mtt;
|
||||
rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
|
||||
BIT(mr->page_shift)) {
|
||||
if (cur_mtt == (void *)mtt + sg.length) {
|
||||
dma_sync_single_for_device(ddev, sg.addr, sg.length,
|
||||
DMA_TO_DEVICE);
|
||||
err = mlx5_ib_post_send_wait(dev, &wr);
|
||||
if (err)
|
||||
goto err;
|
||||
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
|
||||
DMA_TO_DEVICE);
|
||||
wr.offset += sg.length;
|
||||
cur_mtt = mtt;
|
||||
}
|
||||
|
||||
cur_mtt->ptag =
|
||||
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
|
||||
MLX5_IB_MTT_PRESENT);
|
||||
cur_mtt++;
|
||||
}
|
||||
|
||||
final_size = (void *)cur_mtt - (void *)mtt;
|
||||
sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
|
||||
memset(cur_mtt, 0, sg.length - final_size);
|
||||
wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
|
||||
wr.xlt_size = sg.length;
|
||||
|
||||
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
|
||||
err = mlx5_ib_post_send_wait(dev, &wr);
|
||||
|
||||
err:
|
||||
sg.length = orig_sg_length;
|
||||
mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If ibmr is NULL it will be allocated by reg_create.
|
||||
* Else, the given ibmr will be used.
|
||||
|
@ -1480,12 +1537,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
|
|||
* configured properly but left disabled. It is safe to go ahead
|
||||
* and configure it again via UMR while enabling it.
|
||||
*/
|
||||
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
|
||||
|
||||
err = mlx5_ib_update_xlt(
|
||||
mr, 0,
|
||||
ib_umem_num_dma_blocks(umem, 1UL << mr->page_shift),
|
||||
mr->page_shift, update_xlt_flags);
|
||||
err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
|
||||
if (err) {
|
||||
dereg_mr(dev, mr);
|
||||
return ERR_PTR(err);
|
||||
|
@ -1651,11 +1703,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
|
|||
upd_flags |= MLX5_IB_UPD_XLT_PD;
|
||||
if (flags & IB_MR_REREG_ACCESS)
|
||||
upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
|
||||
err = mlx5_ib_update_xlt(
|
||||
mr, 0,
|
||||
ib_umem_num_dma_blocks(mr->umem,
|
||||
1UL << mr->page_shift),
|
||||
mr->page_shift, upd_flags);
|
||||
err = mlx5_ib_update_mr_pas(mr, upd_flags);
|
||||
} else {
|
||||
err = rereg_umr(pd, mr, access_flags, flags);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue