habanalabs: Use single pool for CPU accessible host memory
The device's CPU accessible memory on host is managed in a dedicated pool, except for 2 regions - Primary Queue (PQ) and Event Queue (EQ) - which are allocated from generic DMA pools. Due to address length limitations of the CPU, the addresses of all these memory regions must have the same MSBs starting at bit 40. This patch modifies the allocation of the PQ and EQ to be also from the dedicated pool, to ensure compliance with the limitation. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
a38693d775
commit
03d5f641dc
|
@ -321,6 +321,18 @@ struct hl_cs_job;
|
||||||
#define HL_EQ_LENGTH 64
|
#define HL_EQ_LENGTH 64
|
||||||
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
|
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
|
||||||
|
|
||||||
|
#define HL_CPU_PKT_SHIFT 5
|
||||||
|
#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT)
|
||||||
|
#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1))
|
||||||
|
#define HL_CPU_MAX_PKTS_IN_CB 32
|
||||||
|
#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \
|
||||||
|
HL_CPU_MAX_PKTS_IN_CB)
|
||||||
|
#define HL_CPU_CB_QUEUE_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
|
||||||
|
|
||||||
|
/* KMD <-> ArmCP shared memory size (EQ + PQ + CPU CB queue) */
|
||||||
|
#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_EQ_SIZE_IN_BYTES + \
|
||||||
|
HL_QUEUE_SIZE_IN_BYTES + \
|
||||||
|
HL_CPU_CB_QUEUE_SIZE)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct hl_hw_queue - describes a H/W transport queue.
|
* struct hl_hw_queue - describes a H/W transport queue.
|
||||||
|
|
|
@ -415,14 +415,20 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
|
static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
|
||||||
struct hl_hw_queue *q)
|
struct hl_hw_queue *q, bool is_cpu_queue)
|
||||||
{
|
{
|
||||||
void *p;
|
void *p;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
p = hdev->asic_funcs->dma_alloc_coherent(hdev,
|
if (is_cpu_queue)
|
||||||
HL_QUEUE_SIZE_IN_BYTES,
|
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
|
||||||
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
|
HL_QUEUE_SIZE_IN_BYTES,
|
||||||
|
&q->bus_address);
|
||||||
|
else
|
||||||
|
p = hdev->asic_funcs->dma_alloc_coherent(hdev,
|
||||||
|
HL_QUEUE_SIZE_IN_BYTES,
|
||||||
|
&q->bus_address,
|
||||||
|
GFP_KERNEL | __GFP_ZERO);
|
||||||
if (!p)
|
if (!p)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
@ -446,8 +452,15 @@ static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
free_queue:
|
free_queue:
|
||||||
hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
|
if (is_cpu_queue)
|
||||||
(void *) (uintptr_t) q->kernel_address, q->bus_address);
|
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
|
||||||
|
HL_QUEUE_SIZE_IN_BYTES,
|
||||||
|
(void *) (uintptr_t) q->kernel_address);
|
||||||
|
else
|
||||||
|
hdev->asic_funcs->dma_free_coherent(hdev,
|
||||||
|
HL_QUEUE_SIZE_IN_BYTES,
|
||||||
|
(void *) (uintptr_t) q->kernel_address,
|
||||||
|
q->bus_address);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -474,12 +487,12 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
||||||
|
|
||||||
static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
||||||
{
|
{
|
||||||
return ext_and_cpu_hw_queue_init(hdev, q);
|
return ext_and_cpu_hw_queue_init(hdev, q, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
|
||||||
{
|
{
|
||||||
return ext_and_cpu_hw_queue_init(hdev, q);
|
return ext_and_cpu_hw_queue_init(hdev, q, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -569,8 +582,15 @@ static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
|
||||||
|
|
||||||
kfree(q->shadow_queue);
|
kfree(q->shadow_queue);
|
||||||
|
|
||||||
hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
|
if (q->queue_type == QUEUE_TYPE_CPU)
|
||||||
(void *) (uintptr_t) q->kernel_address, q->bus_address);
|
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
|
||||||
|
HL_QUEUE_SIZE_IN_BYTES,
|
||||||
|
(void *) (uintptr_t) q->kernel_address);
|
||||||
|
else
|
||||||
|
hdev->asic_funcs->dma_free_coherent(hdev,
|
||||||
|
HL_QUEUE_SIZE_IN_BYTES,
|
||||||
|
(void *) (uintptr_t) q->kernel_address,
|
||||||
|
q->bus_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
int hl_hw_queues_create(struct hl_device *hdev)
|
int hl_hw_queues_create(struct hl_device *hdev)
|
||||||
|
|
|
@ -300,14 +300,6 @@ enum armcp_pwm_attributes {
|
||||||
armcp_pwm_enable
|
armcp_pwm_enable
|
||||||
};
|
};
|
||||||
|
|
||||||
#define HL_CPU_PKT_SHIFT 5
|
|
||||||
#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT)
|
|
||||||
#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1))
|
|
||||||
#define HL_CPU_MAX_PKTS_IN_CB 32
|
|
||||||
#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \
|
|
||||||
HL_CPU_MAX_PKTS_IN_CB)
|
|
||||||
#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
|
|
||||||
|
|
||||||
/* Event Queue Packets */
|
/* Event Queue Packets */
|
||||||
|
|
||||||
struct eq_generic_event {
|
struct eq_generic_event {
|
||||||
|
|
|
@ -284,8 +284,9 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
|
||||||
|
|
||||||
BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
|
BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
|
||||||
|
|
||||||
p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_EQ_SIZE_IN_BYTES,
|
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
|
||||||
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
|
HL_EQ_SIZE_IN_BYTES,
|
||||||
|
&q->bus_address);
|
||||||
if (!p)
|
if (!p)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
@ -308,8 +309,9 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
|
||||||
{
|
{
|
||||||
flush_workqueue(hdev->eq_wq);
|
flush_workqueue(hdev->eq_wq);
|
||||||
|
|
||||||
hdev->asic_funcs->dma_free_coherent(hdev, HL_EQ_SIZE_IN_BYTES,
|
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
|
||||||
(void *) (uintptr_t) q->kernel_address, q->bus_address);
|
HL_EQ_SIZE_IN_BYTES,
|
||||||
|
(void *) (uintptr_t) q->kernel_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
|
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
|
||||||
|
|
Loading…
Reference in New Issue