habanalabs: add context and ASID modules

This patch adds two modules - ASID and context.

Each user process that opens a device's file must have at least one
context before it is able to "work" with the device. Each context has its
own device address-space and contains information about its runtime state
(its active command submissions).

To have address-space separation between contexts, each context is assigned
a unique ASID, which stands for "address-space id". Goya supports up to
1024 ASIDs.

Currently, the driver doesn't support multiple contexts. Therefore, the
user doesn't need to actively create a context. A "primary context" is
created automatically when the user opens the device's file.

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Oded Gabbay 2019-02-16 00:39:14 +02:00 committed by Greg Kroah-Hartman
parent 99b9d7b497
commit 0861e41de5
6 changed files with 374 additions and 3 deletions

View File

@ -4,7 +4,7 @@
obj-m := habanalabs.o
habanalabs-y := habanalabs_drv.o device.o
habanalabs-y := habanalabs_drv.o device.o context.o asid.o
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)

View File

@ -0,0 +1,57 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "habanalabs.h"
#include <linux/slab.h>
int hl_asid_init(struct hl_device *hdev)
{
hdev->asid_bitmap = kcalloc(BITS_TO_LONGS(hdev->asic_prop.max_asid),
sizeof(*hdev->asid_bitmap), GFP_KERNEL);
if (!hdev->asid_bitmap)
return -ENOMEM;
mutex_init(&hdev->asid_mutex);
/* ASID 0 is reserved for KMD */
set_bit(0, hdev->asid_bitmap);
return 0;
}
void hl_asid_fini(struct hl_device *hdev)
{
mutex_destroy(&hdev->asid_mutex);
kfree(hdev->asid_bitmap);
}
unsigned long hl_asid_alloc(struct hl_device *hdev)
{
unsigned long found;
mutex_lock(&hdev->asid_mutex);
found = find_first_zero_bit(hdev->asid_bitmap,
hdev->asic_prop.max_asid);
if (found == hdev->asic_prop.max_asid)
found = 0;
else
set_bit(found, hdev->asid_bitmap);
mutex_unlock(&hdev->asid_mutex);
return found;
}
void hl_asid_free(struct hl_device *hdev, unsigned long asid)
{
if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
"Invalid ASID %lu", asid))
return;
clear_bit(asid, hdev->asid_bitmap);
}

View File

@ -0,0 +1,154 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "habanalabs.h"
#include <linux/slab.h>
static void hl_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
if (ctx->asid != HL_KERNEL_ASID_ID)
hl_asid_free(hdev, ctx->asid);
}
void hl_ctx_do_release(struct kref *ref)
{
struct hl_ctx *ctx;
ctx = container_of(ref, struct hl_ctx, refcount);
dev_dbg(ctx->hdev->dev, "Now really releasing context %d\n", ctx->asid);
hl_ctx_fini(ctx);
if (ctx->hpriv)
hl_hpriv_put(ctx->hpriv);
kfree(ctx);
}
int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
{
struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr;
struct hl_ctx *ctx;
int rc;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
rc = -ENOMEM;
goto out_err;
}
rc = hl_ctx_init(hdev, ctx, false);
if (rc)
goto free_ctx;
hl_hpriv_get(hpriv);
ctx->hpriv = hpriv;
/* TODO: remove for multiple contexts */
hpriv->ctx = ctx;
hdev->user_ctx = ctx;
mutex_lock(&mgr->ctx_lock);
rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
mutex_unlock(&mgr->ctx_lock);
if (rc < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
hl_ctx_free(hdev, ctx);
goto out_err;
}
return 0;
free_ctx:
kfree(ctx);
out_err:
return rc;
}
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
{
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
return;
dev_warn(hdev->dev,
"Context %d closed or terminated but its CS are executing\n",
ctx->asid);
}
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
ctx->hdev = hdev;
kref_init(&ctx->refcount);
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
} else {
ctx->asid = hl_asid_alloc(hdev);
if (!ctx->asid) {
dev_err(hdev->dev, "No free ASID, failed to create context\n");
return -ENOMEM;
}
}
dev_dbg(hdev->dev, "Created context with ASID %u\n", ctx->asid);
return 0;
}
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
{
kref_get(&ctx->refcount);
}
int hl_ctx_put(struct hl_ctx *ctx)
{
return kref_put(&ctx->refcount, hl_ctx_do_release);
}
/*
* hl_ctx_mgr_init - initialize the context manager
*
* @mgr: pointer to context manager structure
*
* This manager is an object inside the hpriv object of the user process.
* The function is called when a user process opens the FD.
*/
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr)
{
mutex_init(&mgr->ctx_lock);
idr_init(&mgr->ctx_handles);
}
/*
* hl_ctx_mgr_fini - finalize the context manager
*
* @hdev: pointer to device structure
* @mgr: pointer to context manager structure
*
* This function goes over all the contexts in the manager and frees them.
* It is called when a process closes the FD.
*/
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
{
struct hl_ctx *ctx;
struct idr *idp;
u32 id;
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id)
hl_ctx_free(hdev, ctx);
idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->ctx_lock);
}

View File

@ -22,6 +22,12 @@ static void hpriv_release(struct kref *ref)
put_pid(hpriv->taskpid);
kfree(hpriv);
/* Now the FD is really closed */
atomic_dec(&hdev->fd_open_cnt);
/* This allows a new user context to open the device */
hdev->user_ctx = NULL;
}
void hl_hpriv_get(struct hl_fpriv *hpriv)
@ -46,6 +52,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
{
struct hl_fpriv *hpriv = filp->private_data;
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
hl_hpriv_put(hpriv);
@ -137,7 +145,20 @@ static int device_early_init(struct hl_device *hdev)
if (rc)
return rc;
rc = hl_asid_init(hdev);
if (rc)
goto early_fini;
mutex_init(&hdev->fd_open_cnt_lock);
atomic_set(&hdev->fd_open_cnt, 0);
return 0;
early_fini:
if (hdev->asic_funcs->early_fini)
hdev->asic_funcs->early_fini(hdev);
return rc;
}
/*
@ -149,9 +170,12 @@ static int device_early_init(struct hl_device *hdev)
static void device_early_fini(struct hl_device *hdev)
{
hl_asid_fini(hdev);
if (hdev->asic_funcs->early_fini)
hdev->asic_funcs->early_fini(hdev);
mutex_destroy(&hdev->fd_open_cnt_lock);
}
/*
@ -245,11 +269,30 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (rc)
goto early_fini;
/* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
if (!hdev->kernel_ctx) {
rc = -ENOMEM;
goto sw_fini;
}
hdev->user_ctx = NULL;
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
if (rc) {
dev_err(hdev->dev, "failed to initialize kernel context\n");
goto free_ctx;
}
dev_notice(hdev->dev,
"Successfully added device to habanalabs driver\n");
return 0;
free_ctx:
kfree(hdev->kernel_ctx);
sw_fini:
hdev->asic_funcs->sw_fini(hdev);
early_fini:
device_early_fini(hdev);
release_device:
@ -282,6 +325,10 @@ void hl_device_fini(struct hl_device *hdev)
/* Mark device as disabled */
hdev->disabled = true;
/* Release kernel context */
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
dev_err(hdev->dev, "kernel ctx is still alive\n");
/* Call ASIC S/W finalize function */
hdev->asic_funcs->sw_fini(hdev);

View File

@ -70,6 +70,8 @@ struct asic_fixed_properties {
#define HL_QUEUE_LENGTH 256
/*
* ASICs
*/
@ -117,6 +119,39 @@ struct hl_asic_funcs {
void *cpu_addr, dma_addr_t dma_handle);
};
/*
* CONTEXTS
*/
#define HL_KERNEL_ASID_ID 0
/**
* struct hl_ctx - user/kernel context.
* @hpriv: pointer to the private (KMD) data of the process (fd).
* @hdev: pointer to the device structure.
* @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT.
* @asid: context's unique address space ID in the device's MMU.
*/
struct hl_ctx {
struct hl_fpriv *hpriv;
struct hl_device *hdev;
struct kref refcount;
u32 asid;
};
/**
* struct hl_ctx_mgr - for handling multiple contexts.
* @ctx_lock: protects ctx_handles.
* @ctx_handles: idr to hold all ctx handles.
*/
struct hl_ctx_mgr {
struct mutex ctx_lock;
struct idr ctx_handles;
};
/*
* FILE PRIVATE STRUCTURE
*/
@ -126,12 +161,16 @@ struct hl_asic_funcs {
* @hdev: habanalabs device structure.
* @filp: pointer to the given file structure.
* @taskpid: current process ID.
* @ctx: current executing context.
* @ctx_mgr: context manager to handle multiple context for this FD.
* @refcount: number of related contexts.
*/
struct hl_fpriv {
struct hl_device *hdev;
struct file *filp;
struct pid *taskpid;
struct hl_ctx *ctx; /* TODO: remove for multiple ctx */
struct hl_ctx_mgr ctx_mgr;
struct kref refcount;
};
@ -185,13 +224,24 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
* @dev: realted kernel basic device structure.
* @asic_name: ASIC specific nmae.
* @asic_type: ASIC specific type.
* @kernel_ctx: KMD context structure.
* @dma_pool: DMA pool for small allocations.
* @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
* @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
* @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool.
* @asid_bitmap: holds used/available ASIDs.
* @asid_mutex: protects asid_bitmap.
* @fd_open_cnt_lock: lock for updating fd_open_cnt in hl_device_open. Although
* fd_open_cnt is atomic, we need this lock to serialize
* the open function because the driver currently supports
* only a single process at a time. In addition, we need a
* lock here so we can flush user processes which are opening
* the device while we are trying to hard reset it
* @asic_prop: ASIC specific immutable properties.
* @asic_funcs: ASIC specific functions.
* @asic_specific: ASIC specific information to use only from ASIC files.
* @user_ctx: current user context executing.
* @fd_open_cnt: number of open user processes.
* @major: habanalabs KMD major.
* @id: device minor.
* @disabled: is device disabled.
@ -204,13 +254,21 @@ struct hl_device {
struct device *dev;
char asic_name[16];
enum hl_asic_type asic_type;
struct hl_ctx *kernel_ctx;
struct dma_pool *dma_pool;
void *cpu_accessible_dma_mem;
dma_addr_t cpu_accessible_dma_address;
struct gen_pool *cpu_accessible_dma_pool;
unsigned long *asid_bitmap;
struct mutex asid_mutex;
/* TODO: remove fd_open_cnt_lock for multiple process support */
struct mutex fd_open_cnt_lock;
struct asic_fixed_properties asic_prop;
const struct hl_asic_funcs *asic_funcs;
void *asic_specific;
/* TODO: remove user_ctx for multiple process support */
struct hl_ctx *user_ctx;
atomic_t fd_open_cnt;
u32 major;
u16 id;
u8 disabled;
@ -258,10 +316,23 @@ int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr, u32 timeout_us,
int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
u32 timeout_us, u32 *val);
int hl_asid_init(struct hl_device *hdev);
void hl_asid_fini(struct hl_device *hdev);
unsigned long hl_asid_alloc(struct hl_device *hdev);
void hl_asid_free(struct hl_device *hdev, unsigned long asid);
int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
int hl_ctx_put(struct hl_ctx *ctx);
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
int hl_device_init(struct hl_device *hdev, struct class *hclass);
void hl_device_fini(struct hl_device *hdev);
int hl_device_suspend(struct hl_device *hdev);
int hl_device_resume(struct hl_device *hdev);
void hl_hpriv_get(struct hl_fpriv *hpriv);
void hl_hpriv_put(struct hl_fpriv *hpriv);
void goya_set_asic_funcs(struct hl_device *hdev);

View File

@ -70,6 +70,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
{
struct hl_device *hdev;
struct hl_fpriv *hpriv;
int rc;
mutex_lock(&hl_devs_idr_lock);
hdev = idr_find(&hl_devs_idr, iminor(inode));
@ -81,9 +82,33 @@ int hl_device_open(struct inode *inode, struct file *filp)
return -ENXIO;
}
mutex_lock(&hdev->fd_open_cnt_lock);
if (hdev->disabled) {
dev_err_ratelimited(hdev->dev,
"Can't open %s because it is disabled\n",
dev_name(hdev->dev));
mutex_unlock(&hdev->fd_open_cnt_lock);
return -EPERM;
}
if (atomic_read(&hdev->fd_open_cnt)) {
dev_info_ratelimited(hdev->dev,
"Device %s is already attached to application\n",
dev_name(hdev->dev));
mutex_unlock(&hdev->fd_open_cnt_lock);
return -EBUSY;
}
atomic_inc(&hdev->fd_open_cnt);
mutex_unlock(&hdev->fd_open_cnt_lock);
hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
if (!hpriv)
return -ENOMEM;
if (!hpriv) {
rc = -ENOMEM;
goto close_device;
}
hpriv->hdev = hdev;
filp->private_data = hpriv;
@ -91,9 +116,26 @@ int hl_device_open(struct inode *inode, struct file *filp)
kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
hl_ctx_mgr_init(&hpriv->ctx_mgr);
rc = hl_ctx_create(hdev, hpriv);
if (rc) {
dev_err(hdev->dev, "Failed to open FD (CTX fail)\n");
goto out_err;
}
hpriv->taskpid = find_get_pid(current->pid);
return 0;
out_err:
filp->private_data = NULL;
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
kfree(hpriv);
close_device:
atomic_dec(&hdev->fd_open_cnt);
return rc;
}
/*