OpenCloudOS-Kernel/drivers/fpga/dfl-afu-dma-region.c

419 lines
10 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Driver for FPGA Accelerated Function Unit (AFU) DMA Region Management
*
* Copyright (C) 2017-2018 Intel Corporation, Inc.
*
* Authors:
* Wu Hao <hao.wu@intel.com>
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
*/
#include <linux/dma-mapping.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
mm: add account_locked_vm utility function locked_vm accounting is done roughly the same way in five places, so unify them in a helper. Include the helper's caller in the debug print to distinguish between callsites. Error codes stay the same, so user-visible behavior does too. The one exception is that the -EPERM case in tce_account_locked_vm is removed because Alexey has never seen it triggered. [daniel.m.jordan@oracle.com: v3] Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com [sfr@canb.auug.org.au: fix mm/util.c] Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.com Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru> Acked-by: Alex Williamson <alex.williamson@redhat.com> Cc: Alan Tull <atull@kernel.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Christoph Lameter <cl@linux.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Jason Gunthorpe <jgg@mellanox.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Moritz Fischer <mdf@kernel.org> Cc: Paul Mackerras <paulus@ozlabs.org> Cc: Steve Sistare <steven.sistare@oracle.com> Cc: Wu Hao <hao.wu@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 07:30:54 +08:00
#include <linux/mm.h>
#include "dfl-afu.h"
static void put_all_pages(struct page **pages, int npages)
{
int i;
for (i = 0; i < npages; i++)
if (pages[i])
put_page(pages[i]);
}
void afu_dma_region_init(struct dfl_feature_platform_data *pdata)
{
struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
afu->dma_regions = RB_ROOT;
}
/**
* afu_dma_pin_pages - pin pages of given dma memory region
* @pdata: feature device platform data
* @region: dma memory region to be pinned
*
* Pin all the pages of given dfl_afu_dma_region.
* Return 0 for success or negative error code.
*/
static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata,
struct dfl_afu_dma_region *region)
{
int npages = region->length >> PAGE_SHIFT;
struct device *dev = &pdata->dev->dev;
int ret, pinned;
mm: add account_locked_vm utility function locked_vm accounting is done roughly the same way in five places, so unify them in a helper. Include the helper's caller in the debug print to distinguish between callsites. Error codes stay the same, so user-visible behavior does too. The one exception is that the -EPERM case in tce_account_locked_vm is removed because Alexey has never seen it triggered. [daniel.m.jordan@oracle.com: v3] Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com [sfr@canb.auug.org.au: fix mm/util.c] Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.com Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru> Acked-by: Alex Williamson <alex.williamson@redhat.com> Cc: Alan Tull <atull@kernel.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Christoph Lameter <cl@linux.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Jason Gunthorpe <jgg@mellanox.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Moritz Fischer <mdf@kernel.org> Cc: Paul Mackerras <paulus@ozlabs.org> Cc: Steve Sistare <steven.sistare@oracle.com> Cc: Wu Hao <hao.wu@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 07:30:54 +08:00
ret = account_locked_vm(current->mm, npages, true);
if (ret)
return ret;
region->pages = kcalloc(npages, sizeof(struct page *), GFP_KERNEL);
if (!region->pages) {
ret = -ENOMEM;
goto unlock_vm;
}
mm/gup: change GUP fast to use flags rather than a write 'bool' To facilitate additional options to get_user_pages_fast() change the singular write parameter to be gup_flags. This patch does not change any functionality. New functionality will follow in subsequent patches. Some of the get_user_pages_fast() call sites were unchanged because they already passed FOLL_WRITE or 0 for the write parameter. NOTE: It was suggested to change the ordering of the get_user_pages_fast() arguments to ensure that callers were converted. This breaks the current GUP call site convention of having the returned pages be the final parameter. So the suggestion was rejected. Link: http://lkml.kernel.org/r/20190328084422.29911-4-ira.weiny@intel.com Link: http://lkml.kernel.org/r/20190317183438.2057-4-ira.weiny@intel.com Signed-off-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Mike Marshall <hubcap@omnibond.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Hogan <jhogan@kernel.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: John Hubbard <jhubbard@nvidia.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Rich Felker <dalias@libc.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-05-14 08:17:11 +08:00
pinned = get_user_pages_fast(region->user_addr, npages, FOLL_WRITE,
region->pages);
if (pinned < 0) {
ret = pinned;
goto put_pages;
} else if (pinned != npages) {
ret = -EFAULT;
goto free_pages;
}
dev_dbg(dev, "%d pages pinned\n", pinned);
return 0;
put_pages:
put_all_pages(region->pages, pinned);
free_pages:
kfree(region->pages);
unlock_vm:
mm: add account_locked_vm utility function locked_vm accounting is done roughly the same way in five places, so unify them in a helper. Include the helper's caller in the debug print to distinguish between callsites. Error codes stay the same, so user-visible behavior does too. The one exception is that the -EPERM case in tce_account_locked_vm is removed because Alexey has never seen it triggered. [daniel.m.jordan@oracle.com: v3] Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com [sfr@canb.auug.org.au: fix mm/util.c] Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.com Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru> Acked-by: Alex Williamson <alex.williamson@redhat.com> Cc: Alan Tull <atull@kernel.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Christoph Lameter <cl@linux.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Jason Gunthorpe <jgg@mellanox.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Moritz Fischer <mdf@kernel.org> Cc: Paul Mackerras <paulus@ozlabs.org> Cc: Steve Sistare <steven.sistare@oracle.com> Cc: Wu Hao <hao.wu@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 07:30:54 +08:00
account_locked_vm(current->mm, npages, false);
return ret;
}
/**
* afu_dma_unpin_pages - unpin pages of given dma memory region
* @pdata: feature device platform data
* @region: dma memory region to be unpinned
*
* Unpin all the pages of given dfl_afu_dma_region.
* Return 0 for success or negative error code.
*/
static void afu_dma_unpin_pages(struct dfl_feature_platform_data *pdata,
struct dfl_afu_dma_region *region)
{
long npages = region->length >> PAGE_SHIFT;
struct device *dev = &pdata->dev->dev;
put_all_pages(region->pages, npages);
kfree(region->pages);
mm: add account_locked_vm utility function locked_vm accounting is done roughly the same way in five places, so unify them in a helper. Include the helper's caller in the debug print to distinguish between callsites. Error codes stay the same, so user-visible behavior does too. The one exception is that the -EPERM case in tce_account_locked_vm is removed because Alexey has never seen it triggered. [daniel.m.jordan@oracle.com: v3] Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com [sfr@canb.auug.org.au: fix mm/util.c] Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.com Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru> Acked-by: Alex Williamson <alex.williamson@redhat.com> Cc: Alan Tull <atull@kernel.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Christoph Lameter <cl@linux.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Jason Gunthorpe <jgg@mellanox.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Moritz Fischer <mdf@kernel.org> Cc: Paul Mackerras <paulus@ozlabs.org> Cc: Steve Sistare <steven.sistare@oracle.com> Cc: Wu Hao <hao.wu@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 07:30:54 +08:00
account_locked_vm(current->mm, npages, false);
dev_dbg(dev, "%ld pages unpinned\n", npages);
}
/**
* afu_dma_check_continuous_pages - check if pages are continuous
* @region: dma memory region
*
* Return true if pages of given dma memory region have continuous physical
* address, otherwise return false.
*/
static bool afu_dma_check_continuous_pages(struct dfl_afu_dma_region *region)
{
int npages = region->length >> PAGE_SHIFT;
int i;
for (i = 0; i < npages - 1; i++)
if (page_to_pfn(region->pages[i]) + 1 !=
page_to_pfn(region->pages[i + 1]))
return false;
return true;
}
/**
* dma_region_check_iova - check if memory area is fully contained in the region
* @region: dma memory region
* @iova: address of the dma memory area
* @size: size of the dma memory area
*
* Compare the dma memory area defined by @iova and @size with given dma region.
* Return true if memory area is fully contained in the region, otherwise false.
*/
static bool dma_region_check_iova(struct dfl_afu_dma_region *region,
u64 iova, u64 size)
{
if (!size && region->iova != iova)
return false;
return (region->iova <= iova) &&
(region->length + region->iova >= iova + size);
}
/**
* afu_dma_region_add - add given dma region to rbtree
* @pdata: feature device platform data
* @region: dma region to be added
*
* Return 0 for success, -EEXIST if dma region has already been added.
*
* Needs to be called with pdata->lock heold.
*/
static int afu_dma_region_add(struct dfl_feature_platform_data *pdata,
struct dfl_afu_dma_region *region)
{
struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
struct rb_node **new, *parent = NULL;
dev_dbg(&pdata->dev->dev, "add region (iova = %llx)\n",
(unsigned long long)region->iova);
new = &afu->dma_regions.rb_node;
while (*new) {
struct dfl_afu_dma_region *this;
this = container_of(*new, struct dfl_afu_dma_region, node);
parent = *new;
if (dma_region_check_iova(this, region->iova, region->length))
return -EEXIST;
if (region->iova < this->iova)
new = &((*new)->rb_left);
else if (region->iova > this->iova)
new = &((*new)->rb_right);
else
return -EEXIST;
}
rb_link_node(&region->node, parent, new);
rb_insert_color(&region->node, &afu->dma_regions);
return 0;
}
/**
* afu_dma_region_remove - remove given dma region from rbtree
* @pdata: feature device platform data
* @region: dma region to be removed
*
* Needs to be called with pdata->lock heold.
*/
static void afu_dma_region_remove(struct dfl_feature_platform_data *pdata,
struct dfl_afu_dma_region *region)
{
struct dfl_afu *afu;
dev_dbg(&pdata->dev->dev, "del region (iova = %llx)\n",
(unsigned long long)region->iova);
afu = dfl_fpga_pdata_get_private(pdata);
rb_erase(&region->node, &afu->dma_regions);
}
/**
* afu_dma_region_destroy - destroy all regions in rbtree
* @pdata: feature device platform data
*
* Needs to be called with pdata->lock heold.
*/
void afu_dma_region_destroy(struct dfl_feature_platform_data *pdata)
{
struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
struct rb_node *node = rb_first(&afu->dma_regions);
struct dfl_afu_dma_region *region;
while (node) {
region = container_of(node, struct dfl_afu_dma_region, node);
dev_dbg(&pdata->dev->dev, "del region (iova = %llx)\n",
(unsigned long long)region->iova);
rb_erase(node, &afu->dma_regions);
if (region->iova)
dma_unmap_page(dfl_fpga_pdata_to_parent(pdata),
region->iova, region->length,
DMA_BIDIRECTIONAL);
if (region->pages)
afu_dma_unpin_pages(pdata, region);
node = rb_next(node);
kfree(region);
}
}
/**
* afu_dma_region_find - find the dma region from rbtree based on iova and size
* @pdata: feature device platform data
* @iova: address of the dma memory area
* @size: size of the dma memory area
*
* It finds the dma region from the rbtree based on @iova and @size:
* - if @size == 0, it finds the dma region which starts from @iova
* - otherwise, it finds the dma region which fully contains
* [@iova, @iova+size)
* If nothing is matched returns NULL.
*
* Needs to be called with pdata->lock held.
*/
struct dfl_afu_dma_region *
afu_dma_region_find(struct dfl_feature_platform_data *pdata, u64 iova, u64 size)
{
struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
struct rb_node *node = afu->dma_regions.rb_node;
struct device *dev = &pdata->dev->dev;
while (node) {
struct dfl_afu_dma_region *region;
region = container_of(node, struct dfl_afu_dma_region, node);
if (dma_region_check_iova(region, iova, size)) {
dev_dbg(dev, "find region (iova = %llx)\n",
(unsigned long long)region->iova);
return region;
}
if (iova < region->iova)
node = node->rb_left;
else if (iova > region->iova)
node = node->rb_right;
else
/* the iova region is not fully covered. */
break;
}
dev_dbg(dev, "region with iova %llx and size %llx is not found\n",
(unsigned long long)iova, (unsigned long long)size);
return NULL;
}
/**
* afu_dma_region_find_iova - find the dma region from rbtree by iova
* @pdata: feature device platform data
* @iova: address of the dma region
*
* Needs to be called with pdata->lock held.
*/
static struct dfl_afu_dma_region *
afu_dma_region_find_iova(struct dfl_feature_platform_data *pdata, u64 iova)
{
return afu_dma_region_find(pdata, iova, 0);
}
/**
* afu_dma_map_region - map memory region for dma
* @pdata: feature device platform data
* @user_addr: address of the memory region
* @length: size of the memory region
* @iova: pointer of iova address
*
* Map memory region defined by @user_addr and @length, and return dma address
* of the memory region via @iova.
* Return 0 for success, otherwise error code.
*/
int afu_dma_map_region(struct dfl_feature_platform_data *pdata,
u64 user_addr, u64 length, u64 *iova)
{
struct dfl_afu_dma_region *region;
int ret;
/*
* Check Inputs, only accept page-aligned user memory region with
* valid length.
*/
if (!PAGE_ALIGNED(user_addr) || !PAGE_ALIGNED(length) || !length)
return -EINVAL;
/* Check overflow */
if (user_addr + length < user_addr)
return -EINVAL;
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
if (!access_ok((void __user *)(unsigned long)user_addr,
length))
return -EINVAL;
region = kzalloc(sizeof(*region), GFP_KERNEL);
if (!region)
return -ENOMEM;
region->user_addr = user_addr;
region->length = length;
/* Pin the user memory region */
ret = afu_dma_pin_pages(pdata, region);
if (ret) {
dev_err(&pdata->dev->dev, "failed to pin memory region\n");
goto free_region;
}
/* Only accept continuous pages, return error else */
if (!afu_dma_check_continuous_pages(region)) {
dev_err(&pdata->dev->dev, "pages are not continuous\n");
ret = -EINVAL;
goto unpin_pages;
}
/* As pages are continuous then start to do DMA mapping */
region->iova = dma_map_page(dfl_fpga_pdata_to_parent(pdata),
region->pages[0], 0,
region->length,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(dfl_fpga_pdata_to_parent(pdata), region->iova)) {
dev_err(&pdata->dev->dev, "failed to map for dma\n");
ret = -EFAULT;
goto unpin_pages;
}
*iova = region->iova;
mutex_lock(&pdata->lock);
ret = afu_dma_region_add(pdata, region);
mutex_unlock(&pdata->lock);
if (ret) {
dev_err(&pdata->dev->dev, "failed to add dma region\n");
goto unmap_dma;
}
return 0;
unmap_dma:
dma_unmap_page(dfl_fpga_pdata_to_parent(pdata),
region->iova, region->length, DMA_BIDIRECTIONAL);
unpin_pages:
afu_dma_unpin_pages(pdata, region);
free_region:
kfree(region);
return ret;
}
/**
* afu_dma_unmap_region - unmap dma memory region
* @pdata: feature device platform data
* @iova: dma address of the region
*
* Unmap dma memory region based on @iova.
* Return 0 for success, otherwise error code.
*/
int afu_dma_unmap_region(struct dfl_feature_platform_data *pdata, u64 iova)
{
struct dfl_afu_dma_region *region;
mutex_lock(&pdata->lock);
region = afu_dma_region_find_iova(pdata, iova);
if (!region) {
mutex_unlock(&pdata->lock);
return -EINVAL;
}
if (region->in_use) {
mutex_unlock(&pdata->lock);
return -EBUSY;
}
afu_dma_region_remove(pdata, region);
mutex_unlock(&pdata->lock);
dma_unmap_page(dfl_fpga_pdata_to_parent(pdata),
region->iova, region->length, DMA_BIDIRECTIONAL);
afu_dma_unpin_pages(pdata, region);
kfree(region);
return 0;
}