misc: mic: SCIF remote memory map/unmap interface
This patch implements the SCIF mmap/munmap interface. A similar capability is provided to kernel clients via the scif_get_pages()/scif_put_pages() APIs. The SCIF mmap interface queries to check if a window is valid and then remaps the local virtual address to the remote physical pages. These mappings are subsequently destroyed upon receipt of the VMA close operation or scif_get_pages(). This functionality allows SCIF users to directly access remote memory without any driver interaction once the mappings are created thereby providing bare-metal PCIe latency. These mappings are zapped to avoid RMA accesses from user space, if a Coprocessor is reset. Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Reviewed-by: Nikhil Rao <nikhil.rao@intel.com> Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
168ef015ca
commit
f1a2d865e7
|
@ -0,0 +1,699 @@
|
|||
/*
|
||||
* Intel MIC Platform Software Stack (MPSS)
|
||||
*
|
||||
* Copyright(c) 2015 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License, version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Intel SCIF driver.
|
||||
*
|
||||
*/
|
||||
#include "scif_main.h"
|
||||
|
||||
/*
|
||||
* struct scif_vma_info - Information about a remote memory mapping
|
||||
* created via scif_mmap(..)
|
||||
* @vma: VM area struct
|
||||
* @list: link to list of active vmas
|
||||
*/
|
||||
struct scif_vma_info {
|
||||
struct vm_area_struct *vma;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
|
||||
{
|
||||
struct scif_rma_req req;
|
||||
struct scif_window *window = NULL;
|
||||
struct scif_window *recv_window =
|
||||
(struct scif_window *)msg->payload[0];
|
||||
struct scif_endpt *ep;
|
||||
|
||||
ep = (struct scif_endpt *)recv_window->ep;
|
||||
req.out_window = &window;
|
||||
req.offset = recv_window->offset;
|
||||
req.prot = recv_window->prot;
|
||||
req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
|
||||
req.type = SCIF_WINDOW_FULL;
|
||||
req.head = &ep->rma_info.reg_list;
|
||||
msg->payload[0] = ep->remote_ep;
|
||||
|
||||
mutex_lock(&ep->rma_info.rma_lock);
|
||||
/* Does a valid window exist? */
|
||||
if (scif_query_window(&req)) {
|
||||
dev_err(&scifdev->sdev->dev,
|
||||
"%s %d -ENXIO\n", __func__, __LINE__);
|
||||
msg->uop = SCIF_UNREGISTER_ACK;
|
||||
goto error;
|
||||
}
|
||||
|
||||
scif_put_window(window, window->nr_pages);
|
||||
|
||||
if (!window->ref_count) {
|
||||
atomic_inc(&ep->rma_info.tw_refcount);
|
||||
ep->rma_info.async_list_del = 1;
|
||||
list_del_init(&window->list);
|
||||
scif_free_window_offset(ep, window, window->offset);
|
||||
}
|
||||
error:
|
||||
mutex_unlock(&ep->rma_info.rma_lock);
|
||||
if (window && !window->ref_count)
|
||||
scif_queue_for_cleanup(window, &scif_info.rma);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove valid remote memory mappings created via scif_mmap(..) from the
|
||||
* process address space since the remote node is lost
|
||||
*/
|
||||
static void __scif_zap_mmaps(struct scif_endpt *ep)
|
||||
{
|
||||
struct list_head *item;
|
||||
struct scif_vma_info *info;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long size;
|
||||
|
||||
spin_lock(&ep->lock);
|
||||
list_for_each(item, &ep->rma_info.vma_list) {
|
||||
info = list_entry(item, struct scif_vma_info, list);
|
||||
vma = info->vma;
|
||||
size = vma->vm_end - vma->vm_start;
|
||||
zap_vma_ptes(vma, vma->vm_start, size);
|
||||
dev_dbg(scif_info.mdev.this_device,
|
||||
"%s ep %p zap vma %p size 0x%lx\n",
|
||||
__func__, ep, info->vma, size);
|
||||
}
|
||||
spin_unlock(&ep->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Traverse the list of endpoints for a particular remote node and
|
||||
* zap valid remote memory mappings since the remote node is lost
|
||||
*/
|
||||
static void _scif_zap_mmaps(int node, struct list_head *head)
|
||||
{
|
||||
struct scif_endpt *ep;
|
||||
struct list_head *item;
|
||||
|
||||
mutex_lock(&scif_info.connlock);
|
||||
list_for_each(item, head) {
|
||||
ep = list_entry(item, struct scif_endpt, list);
|
||||
if (ep->remote_dev->node == node)
|
||||
__scif_zap_mmaps(ep);
|
||||
}
|
||||
mutex_unlock(&scif_info.connlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper for removing remote memory mappings for a particular node. This API
|
||||
* is called by peer nodes as part of handling a lost node.
|
||||
*/
|
||||
void scif_zap_mmaps(int node)
|
||||
{
|
||||
_scif_zap_mmaps(node, &scif_info.connected);
|
||||
_scif_zap_mmaps(node, &scif_info.disconnected);
|
||||
}
|
||||
|
||||
/*
|
||||
* This API is only called while handling a lost node:
|
||||
* a) Remote node is dead.
|
||||
* b) Remote memory mappings have been zapped
|
||||
* So we can traverse the remote_reg_list without any locks. Since
|
||||
* the window has not yet been unregistered we can drop the ref count
|
||||
* and queue it to the cleanup thread.
|
||||
*/
|
||||
static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
|
||||
{
|
||||
struct list_head *pos, *tmp;
|
||||
struct scif_window *window;
|
||||
|
||||
list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
|
||||
window = list_entry(pos, struct scif_window, list);
|
||||
if (window->ref_count)
|
||||
scif_put_window(window, window->nr_pages);
|
||||
else
|
||||
dev_err(scif_info.mdev.this_device,
|
||||
"%s %d unexpected\n",
|
||||
__func__, __LINE__);
|
||||
if (!window->ref_count) {
|
||||
atomic_inc(&ep->rma_info.tw_refcount);
|
||||
list_del_init(&window->list);
|
||||
scif_queue_for_cleanup(window, &scif_info.rma);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Cleanup remote registration lists for zombie endpoints */
|
||||
void scif_cleanup_rma_for_zombies(int node)
|
||||
{
|
||||
struct scif_endpt *ep;
|
||||
struct list_head *item;
|
||||
|
||||
mutex_lock(&scif_info.eplock);
|
||||
list_for_each(item, &scif_info.zombie) {
|
||||
ep = list_entry(item, struct scif_endpt, list);
|
||||
if (ep->remote_dev && ep->remote_dev->node == node)
|
||||
__scif_cleanup_rma_for_zombies(ep);
|
||||
}
|
||||
mutex_unlock(&scif_info.eplock);
|
||||
flush_work(&scif_info.misc_work);
|
||||
}
|
||||
|
||||
/* Insert the VMA into the per endpoint VMA list */
|
||||
static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
|
||||
{
|
||||
struct scif_vma_info *info;
|
||||
int err = 0;
|
||||
|
||||
info = kzalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info) {
|
||||
err = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
info->vma = vma;
|
||||
spin_lock(&ep->lock);
|
||||
list_add_tail(&info->list, &ep->rma_info.vma_list);
|
||||
spin_unlock(&ep->lock);
|
||||
done:
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Delete the VMA from the per endpoint VMA list */
|
||||
static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
|
||||
{
|
||||
struct list_head *item;
|
||||
struct scif_vma_info *info;
|
||||
|
||||
spin_lock(&ep->lock);
|
||||
list_for_each(item, &ep->rma_info.vma_list) {
|
||||
info = list_entry(item, struct scif_vma_info, list);
|
||||
if (info->vma == vma) {
|
||||
list_del(&info->list);
|
||||
kfree(info);
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ep->lock);
|
||||
}
|
||||
|
||||
static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
|
||||
{
|
||||
struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
|
||||
struct scif_hw_dev *sdev = scifdev->sdev;
|
||||
phys_addr_t out_phys, apt_base = 0;
|
||||
|
||||
/*
|
||||
* If the DMA address is card relative then we need to add the
|
||||
* aperture base for mmap to work correctly
|
||||
*/
|
||||
if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
|
||||
apt_base = sdev->aper->pa;
|
||||
out_phys = apt_base + phys;
|
||||
return out_phys;
|
||||
}
|
||||
|
||||
int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
|
||||
struct scif_range **pages)
|
||||
{
|
||||
struct scif_endpt *ep = (struct scif_endpt *)epd;
|
||||
struct scif_rma_req req;
|
||||
struct scif_window *window = NULL;
|
||||
int nr_pages, err, i;
|
||||
|
||||
dev_dbg(scif_info.mdev.this_device,
|
||||
"SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
|
||||
ep, offset, len);
|
||||
err = scif_verify_epd(ep);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!len || (offset < 0) ||
|
||||
(offset + len < offset) ||
|
||||
(ALIGN(offset, PAGE_SIZE) != offset) ||
|
||||
(ALIGN(len, PAGE_SIZE) != len))
|
||||
return -EINVAL;
|
||||
|
||||
nr_pages = len >> PAGE_SHIFT;
|
||||
|
||||
req.out_window = &window;
|
||||
req.offset = offset;
|
||||
req.prot = 0;
|
||||
req.nr_bytes = len;
|
||||
req.type = SCIF_WINDOW_SINGLE;
|
||||
req.head = &ep->rma_info.remote_reg_list;
|
||||
|
||||
mutex_lock(&ep->rma_info.rma_lock);
|
||||
/* Does a valid window exist? */
|
||||
err = scif_query_window(&req);
|
||||
if (err) {
|
||||
dev_err(&ep->remote_dev->sdev->dev,
|
||||
"%s %d err %d\n", __func__, __LINE__, err);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Allocate scif_range */
|
||||
*pages = kzalloc(sizeof(**pages), GFP_KERNEL);
|
||||
if (!*pages) {
|
||||
err = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Allocate phys addr array */
|
||||
(*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
|
||||
if (!((*pages)->phys_addr)) {
|
||||
err = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
|
||||
/* Allocate virtual address array */
|
||||
((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
|
||||
if (!(*pages)->va) {
|
||||
err = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
/* Populate the values */
|
||||
(*pages)->cookie = window;
|
||||
(*pages)->nr_pages = nr_pages;
|
||||
(*pages)->prot_flags = window->prot;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
(*pages)->phys_addr[i] =
|
||||
__scif_off_to_dma_addr(window, offset +
|
||||
(i * PAGE_SIZE));
|
||||
(*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
|
||||
ep);
|
||||
if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
|
||||
(*pages)->va[i] =
|
||||
ep->remote_dev->sdev->aper->va +
|
||||
(*pages)->phys_addr[i] -
|
||||
ep->remote_dev->sdev->aper->pa;
|
||||
}
|
||||
|
||||
scif_get_window(window, nr_pages);
|
||||
error:
|
||||
mutex_unlock(&ep->rma_info.rma_lock);
|
||||
if (err) {
|
||||
if (*pages) {
|
||||
scif_free((*pages)->phys_addr,
|
||||
nr_pages * sizeof(dma_addr_t));
|
||||
scif_free((*pages)->va,
|
||||
nr_pages * sizeof(void *));
|
||||
kfree(*pages);
|
||||
*pages = NULL;
|
||||
}
|
||||
dev_err(&ep->remote_dev->sdev->dev,
|
||||
"%s %d err %d\n", __func__, __LINE__, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(scif_get_pages);
|
||||
|
||||
int scif_put_pages(struct scif_range *pages)
|
||||
{
|
||||
struct scif_endpt *ep;
|
||||
struct scif_window *window;
|
||||
struct scifmsg msg;
|
||||
|
||||
if (!pages || !pages->cookie)
|
||||
return -EINVAL;
|
||||
|
||||
window = pages->cookie;
|
||||
|
||||
if (!window || window->magic != SCIFEP_MAGIC)
|
||||
return -EINVAL;
|
||||
|
||||
ep = (struct scif_endpt *)window->ep;
|
||||
/*
|
||||
* If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
|
||||
* callee should be allowed to release references to the pages,
|
||||
* else the endpoint was not connected in the first place,
|
||||
* hence the ENOTCONN.
|
||||
*/
|
||||
if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
|
||||
return -ENOTCONN;
|
||||
|
||||
mutex_lock(&ep->rma_info.rma_lock);
|
||||
|
||||
scif_put_window(window, pages->nr_pages);
|
||||
|
||||
/* Initiate window destruction if ref count is zero */
|
||||
if (!window->ref_count) {
|
||||
list_del(&window->list);
|
||||
mutex_unlock(&ep->rma_info.rma_lock);
|
||||
scif_drain_dma_intr(ep->remote_dev->sdev,
|
||||
ep->rma_info.dma_chan);
|
||||
/* Inform the peer about this window being destroyed. */
|
||||
msg.uop = SCIF_MUNMAP;
|
||||
msg.src = ep->port;
|
||||
msg.payload[0] = window->peer_window;
|
||||
/* No error handling for notification messages */
|
||||
scif_nodeqp_send(ep->remote_dev, &msg);
|
||||
/* Destroy this window from the peer's registered AS */
|
||||
scif_destroy_remote_window(window);
|
||||
} else {
|
||||
mutex_unlock(&ep->rma_info.rma_lock);
|
||||
}
|
||||
|
||||
scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
|
||||
scif_free(pages->va, pages->nr_pages * sizeof(void *));
|
||||
kfree(pages);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(scif_put_pages);
|
||||
|
||||
/*
|
||||
* scif_rma_list_mmap:
|
||||
*
|
||||
* Traverse the remote registration list starting from start_window:
|
||||
* 1) Create VtoP mappings via remap_pfn_range(..)
|
||||
* 2) Once step 1) and 2) complete successfully then traverse the range of
|
||||
* windows again and bump the reference count.
|
||||
* RMA lock must be held.
|
||||
*/
|
||||
static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
|
||||
int nr_pages, struct vm_area_struct *vma)
|
||||
{
|
||||
s64 end_offset, loop_offset = offset;
|
||||
struct scif_window *window = start_window;
|
||||
int loop_nr_pages, nr_pages_left = nr_pages;
|
||||
struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
|
||||
struct list_head *head = &ep->rma_info.remote_reg_list;
|
||||
int i, err = 0;
|
||||
dma_addr_t phys_addr;
|
||||
struct scif_window_iter src_win_iter;
|
||||
size_t contig_bytes = 0;
|
||||
|
||||
might_sleep();
|
||||
list_for_each_entry_from(window, head, list) {
|
||||
end_offset = window->offset +
|
||||
(window->nr_pages << PAGE_SHIFT);
|
||||
loop_nr_pages = min_t(int,
|
||||
(end_offset - loop_offset) >> PAGE_SHIFT,
|
||||
nr_pages_left);
|
||||
scif_init_window_iter(window, &src_win_iter);
|
||||
for (i = 0; i < loop_nr_pages; i++) {
|
||||
phys_addr = scif_off_to_dma_addr(window, loop_offset,
|
||||
&contig_bytes,
|
||||
&src_win_iter);
|
||||
phys_addr = scif_get_phys(phys_addr, ep);
|
||||
err = remap_pfn_range(vma,
|
||||
vma->vm_start +
|
||||
loop_offset - offset,
|
||||
phys_addr >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
vma->vm_page_prot);
|
||||
if (err)
|
||||
goto error;
|
||||
loop_offset += PAGE_SIZE;
|
||||
}
|
||||
nr_pages_left -= loop_nr_pages;
|
||||
if (!nr_pages_left)
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* No more failures expected. Bump up the ref count for all
|
||||
* the windows. Another traversal from start_window required
|
||||
* for handling errors encountered across windows during
|
||||
* remap_pfn_range(..).
|
||||
*/
|
||||
loop_offset = offset;
|
||||
nr_pages_left = nr_pages;
|
||||
window = start_window;
|
||||
head = &ep->rma_info.remote_reg_list;
|
||||
list_for_each_entry_from(window, head, list) {
|
||||
end_offset = window->offset +
|
||||
(window->nr_pages << PAGE_SHIFT);
|
||||
loop_nr_pages = min_t(int,
|
||||
(end_offset - loop_offset) >> PAGE_SHIFT,
|
||||
nr_pages_left);
|
||||
scif_get_window(window, loop_nr_pages);
|
||||
nr_pages_left -= loop_nr_pages;
|
||||
loop_offset += (loop_nr_pages << PAGE_SHIFT);
|
||||
if (!nr_pages_left)
|
||||
break;
|
||||
}
|
||||
error:
|
||||
if (err)
|
||||
dev_err(scif_info.mdev.this_device,
|
||||
"%s %d err %d\n", __func__, __LINE__, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* scif_rma_list_munmap:
|
||||
*
|
||||
* Traverse the remote registration list starting from window:
|
||||
* 1) Decrement ref count.
|
||||
* 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
|
||||
* RMA lock must be held.
|
||||
*/
|
||||
static void scif_rma_list_munmap(struct scif_window *start_window,
|
||||
s64 offset, int nr_pages)
|
||||
{
|
||||
struct scifmsg msg;
|
||||
s64 loop_offset = offset, end_offset;
|
||||
int loop_nr_pages, nr_pages_left = nr_pages;
|
||||
struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
|
||||
struct list_head *head = &ep->rma_info.remote_reg_list;
|
||||
struct scif_window *window = start_window, *_window;
|
||||
|
||||
msg.uop = SCIF_MUNMAP;
|
||||
msg.src = ep->port;
|
||||
loop_offset = offset;
|
||||
nr_pages_left = nr_pages;
|
||||
list_for_each_entry_safe_from(window, _window, head, list) {
|
||||
end_offset = window->offset +
|
||||
(window->nr_pages << PAGE_SHIFT);
|
||||
loop_nr_pages = min_t(int,
|
||||
(end_offset - loop_offset) >> PAGE_SHIFT,
|
||||
nr_pages_left);
|
||||
scif_put_window(window, loop_nr_pages);
|
||||
if (!window->ref_count) {
|
||||
struct scif_dev *rdev = ep->remote_dev;
|
||||
|
||||
scif_drain_dma_intr(rdev->sdev,
|
||||
ep->rma_info.dma_chan);
|
||||
/* Inform the peer about this munmap */
|
||||
msg.payload[0] = window->peer_window;
|
||||
/* No error handling for Notification messages. */
|
||||
scif_nodeqp_send(ep->remote_dev, &msg);
|
||||
list_del(&window->list);
|
||||
/* Destroy this window from the peer's registered AS */
|
||||
scif_destroy_remote_window(window);
|
||||
}
|
||||
nr_pages_left -= loop_nr_pages;
|
||||
loop_offset += (loop_nr_pages << PAGE_SHIFT);
|
||||
if (!nr_pages_left)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The private data field of each VMA used to mmap a remote window
|
||||
* points to an instance of struct vma_pvt
|
||||
*/
|
||||
struct vma_pvt {
|
||||
struct scif_endpt *ep; /* End point for remote window */
|
||||
s64 offset; /* offset within remote window */
|
||||
bool valid_offset; /* offset is valid only if the original
|
||||
* mmap request was for a single page
|
||||
* else the offset within the vma is
|
||||
* the correct offset
|
||||
*/
|
||||
struct kref ref;
|
||||
};
|
||||
|
||||
static void vma_pvt_release(struct kref *ref)
|
||||
{
|
||||
struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
|
||||
|
||||
kfree(vmapvt);
|
||||
}
|
||||
|
||||
/**
|
||||
* scif_vma_open - VMA open driver callback
|
||||
* @vma: VMM memory area.
|
||||
* The open method is called by the kernel to allow the subsystem implementing
|
||||
* the VMA to initialize the area. This method is invoked any time a new
|
||||
* reference to the VMA is made (when a process forks, for example).
|
||||
* The one exception happens when the VMA is first created by mmap;
|
||||
* in this case, the driver's mmap method is called instead.
|
||||
* This function is also invoked when an existing VMA is split by the kernel
|
||||
* due to a call to munmap on a subset of the VMA resulting in two VMAs.
|
||||
* The kernel invokes this function only on one of the two VMAs.
|
||||
*/
|
||||
static void scif_vma_open(struct vm_area_struct *vma)
|
||||
{
|
||||
struct vma_pvt *vmapvt = vma->vm_private_data;
|
||||
|
||||
dev_dbg(scif_info.mdev.this_device,
|
||||
"SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
|
||||
vma->vm_start, vma->vm_end);
|
||||
scif_insert_vma(vmapvt->ep, vma);
|
||||
kref_get(&vmapvt->ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* scif_munmap - VMA close driver callback.
|
||||
* @vma: VMM memory area.
|
||||
* When an area is destroyed, the kernel calls its close operation.
|
||||
* Note that there's no usage count associated with VMA's; the area
|
||||
* is opened and closed exactly once by each process that uses it.
|
||||
*/
|
||||
static void scif_munmap(struct vm_area_struct *vma)
|
||||
{
|
||||
struct scif_endpt *ep;
|
||||
struct vma_pvt *vmapvt = vma->vm_private_data;
|
||||
int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
||||
s64 offset;
|
||||
struct scif_rma_req req;
|
||||
struct scif_window *window = NULL;
|
||||
int err;
|
||||
|
||||
might_sleep();
|
||||
dev_dbg(scif_info.mdev.this_device,
|
||||
"SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
|
||||
vma->vm_start, vma->vm_end);
|
||||
ep = vmapvt->ep;
|
||||
offset = vmapvt->valid_offset ? vmapvt->offset :
|
||||
(vma->vm_pgoff) << PAGE_SHIFT;
|
||||
dev_dbg(scif_info.mdev.this_device,
|
||||
"SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
|
||||
ep, nr_pages, offset);
|
||||
req.out_window = &window;
|
||||
req.offset = offset;
|
||||
req.nr_bytes = vma->vm_end - vma->vm_start;
|
||||
req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
|
||||
req.type = SCIF_WINDOW_PARTIAL;
|
||||
req.head = &ep->rma_info.remote_reg_list;
|
||||
|
||||
mutex_lock(&ep->rma_info.rma_lock);
|
||||
|
||||
err = scif_query_window(&req);
|
||||
if (err)
|
||||
dev_err(scif_info.mdev.this_device,
|
||||
"%s %d err %d\n", __func__, __LINE__, err);
|
||||
else
|
||||
scif_rma_list_munmap(window, offset, nr_pages);
|
||||
|
||||
mutex_unlock(&ep->rma_info.rma_lock);
|
||||
/*
|
||||
* The kernel probably zeroes these out but we still want
|
||||
* to clean up our own mess just in case.
|
||||
*/
|
||||
vma->vm_ops = NULL;
|
||||
vma->vm_private_data = NULL;
|
||||
kref_put(&vmapvt->ref, vma_pvt_release);
|
||||
scif_delete_vma(ep, vma);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct scif_vm_ops = {
|
||||
.open = scif_vma_open,
|
||||
.close = scif_munmap,
|
||||
};
|
||||
|
||||
/**
|
||||
* scif_mmap - Map pages in virtual address space to a remote window.
|
||||
* @vma: VMM memory area.
|
||||
* @epd: endpoint descriptor
|
||||
*
|
||||
* Return: Upon successful completion, scif_mmap() returns zero
|
||||
* else an apt error is returned as documented in scif.h
|
||||
*/
|
||||
int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
|
||||
{
|
||||
struct scif_rma_req req;
|
||||
struct scif_window *window = NULL;
|
||||
struct scif_endpt *ep = (struct scif_endpt *)epd;
|
||||
s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
|
||||
int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
||||
int err;
|
||||
struct vma_pvt *vmapvt;
|
||||
|
||||
dev_dbg(scif_info.mdev.this_device,
|
||||
"SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
|
||||
ep, start_offset, nr_pages);
|
||||
err = scif_verify_epd(ep);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
might_sleep();
|
||||
|
||||
err = scif_insert_vma(ep, vma);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
|
||||
if (!vmapvt) {
|
||||
scif_delete_vma(ep, vma);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
vmapvt->ep = ep;
|
||||
kref_init(&vmapvt->ref);
|
||||
|
||||
req.out_window = &window;
|
||||
req.offset = start_offset;
|
||||
req.nr_bytes = vma->vm_end - vma->vm_start;
|
||||
req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
|
||||
req.type = SCIF_WINDOW_PARTIAL;
|
||||
req.head = &ep->rma_info.remote_reg_list;
|
||||
|
||||
mutex_lock(&ep->rma_info.rma_lock);
|
||||
/* Does a valid window exist? */
|
||||
err = scif_query_window(&req);
|
||||
if (err) {
|
||||
dev_err(&ep->remote_dev->sdev->dev,
|
||||
"%s %d err %d\n", __func__, __LINE__, err);
|
||||
goto error_unlock;
|
||||
}
|
||||
|
||||
/* Default prot for loopback */
|
||||
if (!scifdev_self(ep->remote_dev))
|
||||
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
|
||||
|
||||
/*
|
||||
* VM_DONTCOPY - Do not copy this vma on fork
|
||||
* VM_DONTEXPAND - Cannot expand with mremap()
|
||||
* VM_RESERVED - Count as reserved_vm like IO
|
||||
* VM_PFNMAP - Page-ranges managed without "struct page"
|
||||
* VM_IO - Memory mapped I/O or similar
|
||||
*
|
||||
* We do not want to copy this VMA automatically on a fork(),
|
||||
* expand this VMA due to mremap() or swap out these pages since
|
||||
* the VMA is actually backed by physical pages in the remote
|
||||
* node's physical memory and not via a struct page.
|
||||
*/
|
||||
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
|
||||
|
||||
if (!scifdev_self(ep->remote_dev))
|
||||
vma->vm_flags |= VM_IO | VM_PFNMAP;
|
||||
|
||||
/* Map this range of windows */
|
||||
err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
|
||||
if (err) {
|
||||
dev_err(&ep->remote_dev->sdev->dev,
|
||||
"%s %d err %d\n", __func__, __LINE__, err);
|
||||
goto error_unlock;
|
||||
}
|
||||
/* Set up the driver call back */
|
||||
vma->vm_ops = &scif_vm_ops;
|
||||
vma->vm_private_data = vmapvt;
|
||||
error_unlock:
|
||||
mutex_unlock(&ep->rma_info.rma_lock);
|
||||
if (err) {
|
||||
kfree(vmapvt);
|
||||
dev_err(&ep->remote_dev->sdev->dev,
|
||||
"%s %d err %d\n", __func__, __LINE__, err);
|
||||
scif_delete_vma(ep, vma);
|
||||
}
|
||||
return err;
|
||||
}
|
Loading…
Reference in New Issue