OpenCloudOS-Kernel/drivers/gpu/drm/etnaviv/etnaviv_gpu.c

1784 lines
44 KiB
C
Raw Normal View History

/*
* Copyright (C) 2015 Etnaviv Project
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/component.h>
#include <linux/fence.h>
#include <linux/moduleparam.h>
#include <linux/of_device.h>
#include "etnaviv_dump.h"
#include "etnaviv_gpu.h"
#include "etnaviv_gem.h"
#include "etnaviv_mmu.h"
#include "etnaviv_iommu.h"
#include "etnaviv_iommu_v2.h"
#include "common.xml.h"
#include "state.xml.h"
#include "state_hi.xml.h"
#include "cmdstream.xml.h"
static const struct platform_device_id gpu_ids[] = {
{ .name = "etnaviv-gpu,2d" },
{ },
};
static bool etnaviv_dump_core = true;
module_param_named(dump_core, etnaviv_dump_core, bool, 0600);
/*
* Driver functions:
*/
int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
{
switch (param) {
case ETNAVIV_PARAM_GPU_MODEL:
*value = gpu->identity.model;
break;
case ETNAVIV_PARAM_GPU_REVISION:
*value = gpu->identity.revision;
break;
case ETNAVIV_PARAM_GPU_FEATURES_0:
*value = gpu->identity.features;
break;
case ETNAVIV_PARAM_GPU_FEATURES_1:
*value = gpu->identity.minor_features0;
break;
case ETNAVIV_PARAM_GPU_FEATURES_2:
*value = gpu->identity.minor_features1;
break;
case ETNAVIV_PARAM_GPU_FEATURES_3:
*value = gpu->identity.minor_features2;
break;
case ETNAVIV_PARAM_GPU_FEATURES_4:
*value = gpu->identity.minor_features3;
break;
case ETNAVIV_PARAM_GPU_FEATURES_5:
*value = gpu->identity.minor_features4;
break;
case ETNAVIV_PARAM_GPU_FEATURES_6:
*value = gpu->identity.minor_features5;
break;
case ETNAVIV_PARAM_GPU_STREAM_COUNT:
*value = gpu->identity.stream_count;
break;
case ETNAVIV_PARAM_GPU_REGISTER_MAX:
*value = gpu->identity.register_max;
break;
case ETNAVIV_PARAM_GPU_THREAD_COUNT:
*value = gpu->identity.thread_count;
break;
case ETNAVIV_PARAM_GPU_VERTEX_CACHE_SIZE:
*value = gpu->identity.vertex_cache_size;
break;
case ETNAVIV_PARAM_GPU_SHADER_CORE_COUNT:
*value = gpu->identity.shader_core_count;
break;
case ETNAVIV_PARAM_GPU_PIXEL_PIPES:
*value = gpu->identity.pixel_pipes;
break;
case ETNAVIV_PARAM_GPU_VERTEX_OUTPUT_BUFFER_SIZE:
*value = gpu->identity.vertex_output_buffer_size;
break;
case ETNAVIV_PARAM_GPU_BUFFER_SIZE:
*value = gpu->identity.buffer_size;
break;
case ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT:
*value = gpu->identity.instruction_count;
break;
case ETNAVIV_PARAM_GPU_NUM_CONSTANTS:
*value = gpu->identity.num_constants;
break;
case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
*value = gpu->identity.varyings_count;
break;
default:
DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
return -EINVAL;
}
return 0;
}
#define etnaviv_is_model_rev(gpu, mod, rev) \
((gpu)->identity.model == chipModel_##mod && \
(gpu)->identity.revision == rev)
#define etnaviv_field(val, field) \
(((val) & field##__MASK) >> field##__SHIFT)
static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
{
if (gpu->identity.minor_features0 &
chipMinorFeatures0_MORE_MINOR_FEATURES) {
u32 specs[4];
unsigned int streams;
specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
gpu->identity.stream_count = etnaviv_field(specs[0],
VIVS_HI_CHIP_SPECS_STREAM_COUNT);
gpu->identity.register_max = etnaviv_field(specs[0],
VIVS_HI_CHIP_SPECS_REGISTER_MAX);
gpu->identity.thread_count = etnaviv_field(specs[0],
VIVS_HI_CHIP_SPECS_THREAD_COUNT);
gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
gpu->identity.shader_core_count = etnaviv_field(specs[0],
VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
gpu->identity.pixel_pipes = etnaviv_field(specs[0],
VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
gpu->identity.vertex_output_buffer_size =
etnaviv_field(specs[0],
VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
gpu->identity.buffer_size = etnaviv_field(specs[1],
VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
gpu->identity.instruction_count = etnaviv_field(specs[1],
VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
gpu->identity.num_constants = etnaviv_field(specs[1],
VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
gpu->identity.varyings_count = etnaviv_field(specs[2],
VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
/* This overrides the value from older register if non-zero */
streams = etnaviv_field(specs[3],
VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
if (streams)
gpu->identity.stream_count = streams;
}
/* Fill in the stream count if not specified */
if (gpu->identity.stream_count == 0) {
if (gpu->identity.model >= 0x1000)
gpu->identity.stream_count = 4;
else
gpu->identity.stream_count = 1;
}
/* Convert the register max value */
if (gpu->identity.register_max)
gpu->identity.register_max = 1 << gpu->identity.register_max;
else if (gpu->identity.model == chipModel_GC400)
gpu->identity.register_max = 32;
else
gpu->identity.register_max = 64;
/* Convert thread count */
if (gpu->identity.thread_count)
gpu->identity.thread_count = 1 << gpu->identity.thread_count;
else if (gpu->identity.model == chipModel_GC400)
gpu->identity.thread_count = 64;
else if (gpu->identity.model == chipModel_GC500 ||
gpu->identity.model == chipModel_GC530)
gpu->identity.thread_count = 128;
else
gpu->identity.thread_count = 256;
if (gpu->identity.vertex_cache_size == 0)
gpu->identity.vertex_cache_size = 8;
if (gpu->identity.shader_core_count == 0) {
if (gpu->identity.model >= 0x1000)
gpu->identity.shader_core_count = 2;
else
gpu->identity.shader_core_count = 1;
}
if (gpu->identity.pixel_pipes == 0)
gpu->identity.pixel_pipes = 1;
/* Convert virtex buffer size */
if (gpu->identity.vertex_output_buffer_size) {
gpu->identity.vertex_output_buffer_size =
1 << gpu->identity.vertex_output_buffer_size;
} else if (gpu->identity.model == chipModel_GC400) {
if (gpu->identity.revision < 0x4000)
gpu->identity.vertex_output_buffer_size = 512;
else if (gpu->identity.revision < 0x4200)
gpu->identity.vertex_output_buffer_size = 256;
else
gpu->identity.vertex_output_buffer_size = 128;
} else {
gpu->identity.vertex_output_buffer_size = 512;
}
switch (gpu->identity.instruction_count) {
case 0:
if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
gpu->identity.model == chipModel_GC880)
gpu->identity.instruction_count = 512;
else
gpu->identity.instruction_count = 256;
break;
case 1:
gpu->identity.instruction_count = 1024;
break;
case 2:
gpu->identity.instruction_count = 2048;
break;
default:
gpu->identity.instruction_count = 256;
break;
}
if (gpu->identity.num_constants == 0)
gpu->identity.num_constants = 168;
if (gpu->identity.varyings_count == 0) {
if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
gpu->identity.varyings_count = 12;
else
gpu->identity.varyings_count = 8;
}
/*
* For some cores, two varyings are consumed for position, so the
* maximum varying count needs to be reduced by one.
*/
if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
etnaviv_is_model_rev(gpu, GC880, 0x5106))
gpu->identity.varyings_count -= 1;
}
static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
{
u32 chipIdentity;
chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
/* Special case for older graphic cores. */
if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
gpu->identity.model = chipModel_GC500;
gpu->identity.revision = etnaviv_field(chipIdentity,
VIVS_HI_CHIP_IDENTITY_REVISION);
} else {
gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
gpu->identity.revision = gpu_read(gpu, VIVS_HI_CHIP_REV);
/*
* !!!! HACK ALERT !!!!
* Because people change device IDs without letting software
* know about it - here is the hack to make it all look the
* same. Only for GC400 family.
*/
if ((gpu->identity.model & 0xff00) == 0x0400 &&
gpu->identity.model != chipModel_GC420) {
gpu->identity.model = gpu->identity.model & 0x0400;
}
/* Another special case */
if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
if (chipDate == 0x20080814 && chipTime == 0x12051100) {
/*
* This IP has an ECO; put the correct
* revision in it.
*/
gpu->identity.revision = 0x1051;
}
}
}
dev_info(gpu->dev, "model: GC%x, revision: %x\n",
gpu->identity.model, gpu->identity.revision);
gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
/* Disable fast clear on GC700. */
if (gpu->identity.model == chipModel_GC700)
gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
if ((gpu->identity.model == chipModel_GC500 &&
gpu->identity.revision < 2) ||
(gpu->identity.model == chipModel_GC300 &&
gpu->identity.revision < 0x2000)) {
/*
* GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
* registers.
*/
gpu->identity.minor_features0 = 0;
gpu->identity.minor_features1 = 0;
gpu->identity.minor_features2 = 0;
gpu->identity.minor_features3 = 0;
gpu->identity.minor_features4 = 0;
gpu->identity.minor_features5 = 0;
} else
gpu->identity.minor_features0 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
if (gpu->identity.minor_features0 &
chipMinorFeatures0_MORE_MINOR_FEATURES) {
gpu->identity.minor_features1 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_1);
gpu->identity.minor_features2 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
gpu->identity.minor_features3 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
gpu->identity.minor_features4 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
gpu->identity.minor_features5 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
}
/* GC600 idle register reports zero bits where modules aren't present */
if (gpu->identity.model == chipModel_GC600) {
gpu->idle_mask = VIVS_HI_IDLE_STATE_TX |
VIVS_HI_IDLE_STATE_RA |
VIVS_HI_IDLE_STATE_SE |
VIVS_HI_IDLE_STATE_PA |
VIVS_HI_IDLE_STATE_SH |
VIVS_HI_IDLE_STATE_PE |
VIVS_HI_IDLE_STATE_DE |
VIVS_HI_IDLE_STATE_FE;
} else {
gpu->idle_mask = ~VIVS_HI_IDLE_STATE_AXI_LP;
}
etnaviv_hw_specs(gpu);
}
static void etnaviv_gpu_load_clock(struct etnaviv_gpu *gpu, u32 clock)
{
gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, clock |
VIVS_HI_CLOCK_CONTROL_FSCALE_CMD_LOAD);
gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, clock);
}
static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
{
u32 control, idle;
unsigned long timeout;
bool failed = true;
/* TODO
*
* - clock gating
* - puls eater
* - what about VG?
*/
/* We hope that the GPU resets in under one second */
timeout = jiffies + msecs_to_jiffies(1000);
while (time_is_after_jiffies(timeout)) {
control = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
/* enable clock */
etnaviv_gpu_load_clock(gpu, control);
/* Wait for stable clock. Vivante's code waited for 1ms */
usleep_range(1000, 10000);
/* isolate the GPU. */
control |= VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU;
gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
/* set soft reset. */
control |= VIVS_HI_CLOCK_CONTROL_SOFT_RESET;
gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
/* wait for reset. */
msleep(1);
/* reset soft reset bit. */
control &= ~VIVS_HI_CLOCK_CONTROL_SOFT_RESET;
gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
/* reset GPU isolation. */
control &= ~VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU;
gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
/* read idle register. */
idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
/* try reseting again if FE it not idle */
if ((idle & VIVS_HI_IDLE_STATE_FE) == 0) {
dev_dbg(gpu->dev, "FE is not idle\n");
continue;
}
/* read reset register. */
control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
/* is the GPU idle? */
if (((control & VIVS_HI_CLOCK_CONTROL_IDLE_3D) == 0) ||
((control & VIVS_HI_CLOCK_CONTROL_IDLE_2D) == 0)) {
dev_dbg(gpu->dev, "GPU is not idle\n");
continue;
}
failed = false;
break;
}
if (failed) {
idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
dev_err(gpu->dev, "GPU failed to reset: FE %sidle, 3D %sidle, 2D %sidle\n",
idle & VIVS_HI_IDLE_STATE_FE ? "" : "not ",
control & VIVS_HI_CLOCK_CONTROL_IDLE_3D ? "" : "not ",
control & VIVS_HI_CLOCK_CONTROL_IDLE_2D ? "" : "not ");
return -EBUSY;
}
/* We rely on the GPU running, so program the clock */
control = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
/* enable clock */
etnaviv_gpu_load_clock(gpu, control);
return 0;
}
static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu)
{
u32 pmc, ppc;
/* enable clock gating */
ppc = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
ppc |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
/* Disable stall module clock gating for 4.3.0.1 and 4.3.0.2 revs */
if (gpu->identity.revision == 0x4301 ||
gpu->identity.revision == 0x4302)
ppc |= VIVS_PM_POWER_CONTROLS_DISABLE_STALL_MODULE_CLOCK_GATING;
gpu_write(gpu, VIVS_PM_POWER_CONTROLS, ppc);
pmc = gpu_read(gpu, VIVS_PM_MODULE_CONTROLS);
/* Disable PA clock gating for GC400+ except for GC420 */
if (gpu->identity.model >= chipModel_GC400 &&
gpu->identity.model != chipModel_GC420)
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PA;
/*
* Disable PE clock gating on revs < 5.0.0.0 when HZ is
* present without a bug fix.
*/
if (gpu->identity.revision < 0x5000 &&
gpu->identity.minor_features0 & chipMinorFeatures0_HZ &&
!(gpu->identity.minor_features1 &
chipMinorFeatures1_DISABLE_PE_GATING))
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE;
if (gpu->identity.revision < 0x5422)
pmc |= BIT(15); /* Unknown bit */
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ;
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ;
gpu_write(gpu, VIVS_PM_MODULE_CONTROLS, pmc);
}
static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
{
u16 prefetch;
if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
u32 mc_memory_debug;
mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
if (gpu->identity.revision == 0x5007)
mc_memory_debug |= 0x0c;
else
mc_memory_debug |= 0x08;
gpu_write(gpu, VIVS_MC_DEBUG_MEMORY, mc_memory_debug);
}
/* enable module-level clock gating */
etnaviv_gpu_enable_mlcg(gpu);
/*
* Update GPU AXI cache atttribute to "cacheable, no allocate".
* This is necessary to prevent the iMX6 SoC locking up.
*/
gpu_write(gpu, VIVS_HI_AXI_CONFIG,
VIVS_HI_AXI_CONFIG_AWCACHE(2) |
VIVS_HI_AXI_CONFIG_ARCACHE(2));
/* GC2000 rev 5108 needs a special bus config */
if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
bus_config |= VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG(1) |
VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG(0);
gpu_write(gpu, VIVS_MC_BUS_CONFIG, bus_config);
}
/* set base addresses */
gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, gpu->memory_base);
gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, gpu->memory_base);
gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, gpu->memory_base);
gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, gpu->memory_base);
gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, gpu->memory_base);
/* setup the MMU page table pointers */
etnaviv_iommu_domain_restore(gpu, gpu->mmu->domain);
/* Start command processor */
prefetch = etnaviv_buffer_init(gpu);
gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
gpu_write(gpu, VIVS_FE_COMMAND_ADDRESS,
gpu->buffer->paddr - gpu->memory_base);
gpu_write(gpu, VIVS_FE_COMMAND_CONTROL,
VIVS_FE_COMMAND_CONTROL_ENABLE |
VIVS_FE_COMMAND_CONTROL_PREFETCH(prefetch));
}
int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
{
int ret, i;
struct iommu_domain *iommu;
enum etnaviv_iommu_version version;
bool mmuv2;
ret = pm_runtime_get_sync(gpu->dev);
if (ret < 0) {
dev_err(gpu->dev, "Failed to enable GPU power domain\n");
return ret;
}
etnaviv_hw_identify(gpu);
if (gpu->identity.model == 0) {
dev_err(gpu->dev, "Unknown GPU model\n");
ret = -ENXIO;
goto fail;
}
/* Exclude VG cores with FE2.0 */
if (gpu->identity.features & chipFeatures_PIPE_VG &&
gpu->identity.features & chipFeatures_FE20) {
dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
ret = -ENXIO;
goto fail;
}
/*
* Set the GPU linear window to be at the end of the DMA window, where
* the CMA area is likely to reside. This ensures that we are able to
* map the command buffers while having the linear window overlap as
* much RAM as possible, so we can optimize mappings for other buffers.
*
* For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
* to different views of the memory on the individual engines.
*/
if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
(gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
if (dma_mask < PHYS_OFFSET + SZ_2G)
gpu->memory_base = PHYS_OFFSET;
else
gpu->memory_base = dma_mask - SZ_2G + 1;
}
ret = etnaviv_hw_reset(gpu);
if (ret) {
dev_err(gpu->dev, "GPU reset failed\n");
goto fail;
}
/* Setup IOMMU.. eventually we will (I think) do this once per context
* and have separate page tables per context. For now, to keep things
* simple and to get something working, just use a single address space:
*/
mmuv2 = gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION;
dev_dbg(gpu->dev, "mmuv2: %d\n", mmuv2);
if (!mmuv2) {
iommu = etnaviv_iommu_domain_alloc(gpu);
version = ETNAVIV_IOMMU_V1;
} else {
iommu = etnaviv_iommu_v2_domain_alloc(gpu);
version = ETNAVIV_IOMMU_V2;
}
if (!iommu) {
dev_err(gpu->dev, "Failed to allocate GPU IOMMU domain\n");
ret = -ENOMEM;
goto fail;
}
gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
if (!gpu->mmu) {
dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
iommu_domain_free(iommu);
ret = -ENOMEM;
goto fail;
}
/* Create buffer: */
gpu->buffer = etnaviv_gpu_cmdbuf_new(gpu, PAGE_SIZE, 0);
if (!gpu->buffer) {
ret = -ENOMEM;
dev_err(gpu->dev, "could not create command buffer\n");
goto destroy_iommu;
}
if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
ret = -EINVAL;
dev_err(gpu->dev,
"command buffer outside valid memory window\n");
goto free_buffer;
}
/* Setup event management */
spin_lock_init(&gpu->event_spinlock);
init_completion(&gpu->event_free);
for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
gpu->event[i].used = false;
complete(&gpu->event_free);
}
/* Now program the hardware */
mutex_lock(&gpu->lock);
etnaviv_gpu_hw_init(gpu);
gpu->exec_state = -1;
mutex_unlock(&gpu->lock);
pm_runtime_mark_last_busy(gpu->dev);
pm_runtime_put_autosuspend(gpu->dev);
return 0;
free_buffer:
etnaviv_gpu_cmdbuf_free(gpu->buffer);
gpu->buffer = NULL;
destroy_iommu:
etnaviv_iommu_destroy(gpu->mmu);
gpu->mmu = NULL;
fail:
pm_runtime_mark_last_busy(gpu->dev);
pm_runtime_put_autosuspend(gpu->dev);
return ret;
}
#ifdef CONFIG_DEBUG_FS
struct dma_debug {
u32 address[2];
u32 state[2];
};
static void verify_dma(struct etnaviv_gpu *gpu, struct dma_debug *debug)
{
u32 i;
debug->address[0] = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
debug->state[0] = gpu_read(gpu, VIVS_FE_DMA_DEBUG_STATE);
for (i = 0; i < 500; i++) {
debug->address[1] = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
debug->state[1] = gpu_read(gpu, VIVS_FE_DMA_DEBUG_STATE);
if (debug->address[0] != debug->address[1])
break;
if (debug->state[0] != debug->state[1])
break;
}
}
int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
{
struct dma_debug debug;
u32 dma_lo, dma_hi, axi, idle;
int ret;
seq_printf(m, "%s Status:\n", dev_name(gpu->dev));
ret = pm_runtime_get_sync(gpu->dev);
if (ret < 0)
return ret;
dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
axi = gpu_read(gpu, VIVS_HI_AXI_STATUS);
idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
verify_dma(gpu, &debug);
seq_puts(m, "\tfeatures\n");
seq_printf(m, "\t minor_features0: 0x%08x\n",
gpu->identity.minor_features0);
seq_printf(m, "\t minor_features1: 0x%08x\n",
gpu->identity.minor_features1);
seq_printf(m, "\t minor_features2: 0x%08x\n",
gpu->identity.minor_features2);
seq_printf(m, "\t minor_features3: 0x%08x\n",
gpu->identity.minor_features3);
seq_printf(m, "\t minor_features4: 0x%08x\n",
gpu->identity.minor_features4);
seq_printf(m, "\t minor_features5: 0x%08x\n",
gpu->identity.minor_features5);
seq_puts(m, "\tspecs\n");
seq_printf(m, "\t stream_count: %d\n",
gpu->identity.stream_count);
seq_printf(m, "\t register_max: %d\n",
gpu->identity.register_max);
seq_printf(m, "\t thread_count: %d\n",
gpu->identity.thread_count);
seq_printf(m, "\t vertex_cache_size: %d\n",
gpu->identity.vertex_cache_size);
seq_printf(m, "\t shader_core_count: %d\n",
gpu->identity.shader_core_count);
seq_printf(m, "\t pixel_pipes: %d\n",
gpu->identity.pixel_pipes);
seq_printf(m, "\t vertex_output_buffer_size: %d\n",
gpu->identity.vertex_output_buffer_size);
seq_printf(m, "\t buffer_size: %d\n",
gpu->identity.buffer_size);
seq_printf(m, "\t instruction_count: %d\n",
gpu->identity.instruction_count);
seq_printf(m, "\t num_constants: %d\n",
gpu->identity.num_constants);
seq_printf(m, "\t varyings_count: %d\n",
gpu->identity.varyings_count);
seq_printf(m, "\taxi: 0x%08x\n", axi);
seq_printf(m, "\tidle: 0x%08x\n", idle);
idle |= ~gpu->idle_mask & ~VIVS_HI_IDLE_STATE_AXI_LP;
if ((idle & VIVS_HI_IDLE_STATE_FE) == 0)
seq_puts(m, "\t FE is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_DE) == 0)
seq_puts(m, "\t DE is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_PE) == 0)
seq_puts(m, "\t PE is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_SH) == 0)
seq_puts(m, "\t SH is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_PA) == 0)
seq_puts(m, "\t PA is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_SE) == 0)
seq_puts(m, "\t SE is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_RA) == 0)
seq_puts(m, "\t RA is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_TX) == 0)
seq_puts(m, "\t TX is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_VG) == 0)
seq_puts(m, "\t VG is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_IM) == 0)
seq_puts(m, "\t IM is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_FP) == 0)
seq_puts(m, "\t FP is not idle\n");
if ((idle & VIVS_HI_IDLE_STATE_TS) == 0)
seq_puts(m, "\t TS is not idle\n");
if (idle & VIVS_HI_IDLE_STATE_AXI_LP)
seq_puts(m, "\t AXI low power mode\n");
if (gpu->identity.features & chipFeatures_DEBUG_MODE) {
u32 read0 = gpu_read(gpu, VIVS_MC_DEBUG_READ0);
u32 read1 = gpu_read(gpu, VIVS_MC_DEBUG_READ1);
u32 write = gpu_read(gpu, VIVS_MC_DEBUG_WRITE);
seq_puts(m, "\tMC\n");
seq_printf(m, "\t read0: 0x%08x\n", read0);
seq_printf(m, "\t read1: 0x%08x\n", read1);
seq_printf(m, "\t write: 0x%08x\n", write);
}
seq_puts(m, "\tDMA ");
if (debug.address[0] == debug.address[1] &&
debug.state[0] == debug.state[1]) {
seq_puts(m, "seems to be stuck\n");
} else if (debug.address[0] == debug.address[1]) {
seq_puts(m, "address is constant\n");
} else {
seq_puts(m, "is running\n");
}
seq_printf(m, "\t address 0: 0x%08x\n", debug.address[0]);
seq_printf(m, "\t address 1: 0x%08x\n", debug.address[1]);
seq_printf(m, "\t state 0: 0x%08x\n", debug.state[0]);
seq_printf(m, "\t state 1: 0x%08x\n", debug.state[1]);
seq_printf(m, "\t last fetch 64 bit word: 0x%08x 0x%08x\n",
dma_lo, dma_hi);
ret = 0;
pm_runtime_mark_last_busy(gpu->dev);
pm_runtime_put_autosuspend(gpu->dev);
return ret;
}
#endif
/*
* Power Management:
*/
static int enable_clk(struct etnaviv_gpu *gpu)
{
int ret;
if (gpu->clk_core) {
ret = clk_prepare_enable(gpu->clk_core);
if (ret)
return ret;
}
if (gpu->clk_shader) {
ret = clk_prepare_enable(gpu->clk_shader);
if (ret)
goto disable_clk_core;
}
return 0;
disable_clk_core:
clk_disable_unprepare(gpu->clk_core);
return ret;
}
static int disable_clk(struct etnaviv_gpu *gpu)
{
if (gpu->clk_core)
clk_disable_unprepare(gpu->clk_core);
if (gpu->clk_shader)
clk_disable_unprepare(gpu->clk_shader);
return 0;
}
static int enable_axi(struct etnaviv_gpu *gpu)
{
int ret;
if (gpu->clk_bus) {
ret = clk_prepare_enable(gpu->clk_bus);
if (ret)
return ret;
}
return 0;
}
static int disable_axi(struct etnaviv_gpu *gpu)
{
if (gpu->clk_bus)
clk_disable_unprepare(gpu->clk_bus);
return 0;
}
/*
* Hangcheck detection for locked gpu:
*/
static void recover_worker(struct work_struct *work)
{
struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
recover_work);
unsigned long flags;
unsigned int i;
dev_err(gpu->dev, "hangcheck recover!\n");
if (pm_runtime_get_sync(gpu->dev) < 0)
return;
mutex_lock(&gpu->lock);
/* Only catch the first event, or when manually re-armed */
if (etnaviv_dump_core) {
etnaviv_core_dump(gpu);
etnaviv_dump_core = false;
}
etnaviv_hw_reset(gpu);
/* complete all events, the GPU won't do it after the reset */
spin_lock_irqsave(&gpu->event_spinlock, flags);
for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
if (!gpu->event[i].used)
continue;
fence_signal(gpu->event[i].fence);
gpu->event[i].fence = NULL;
gpu->event[i].used = false;
complete(&gpu->event_free);
}
spin_unlock_irqrestore(&gpu->event_spinlock, flags);
gpu->completed_fence = gpu->active_fence;
etnaviv_gpu_hw_init(gpu);
gpu->switch_context = true;
gpu->exec_state = -1;
mutex_unlock(&gpu->lock);
pm_runtime_mark_last_busy(gpu->dev);
pm_runtime_put_autosuspend(gpu->dev);
/* Retire the buffer objects in a work */
etnaviv_queue_work(gpu->drm, &gpu->retire_work);
}
static void hangcheck_timer_reset(struct etnaviv_gpu *gpu)
{
DBG("%s", dev_name(gpu->dev));
mod_timer(&gpu->hangcheck_timer,
round_jiffies_up(jiffies + DRM_ETNAVIV_HANGCHECK_JIFFIES));
}
static void hangcheck_handler(unsigned long data)
{
struct etnaviv_gpu *gpu = (struct etnaviv_gpu *)data;
u32 fence = gpu->completed_fence;
bool progress = false;
if (fence != gpu->hangcheck_fence) {
gpu->hangcheck_fence = fence;
progress = true;
}
if (!progress) {
u32 dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
int change = dma_addr - gpu->hangcheck_dma_addr;
if (change < 0 || change > 16) {
gpu->hangcheck_dma_addr = dma_addr;
progress = true;
}
}
if (!progress && fence_after(gpu->active_fence, fence)) {
dev_err(gpu->dev, "hangcheck detected gpu lockup!\n");
dev_err(gpu->dev, " completed fence: %u\n", fence);
dev_err(gpu->dev, " active fence: %u\n",
gpu->active_fence);
etnaviv_queue_work(gpu->drm, &gpu->recover_work);
}
/* if still more pending work, reset the hangcheck timer: */
if (fence_after(gpu->active_fence, gpu->hangcheck_fence))
hangcheck_timer_reset(gpu);
}
static void hangcheck_disable(struct etnaviv_gpu *gpu)
{
del_timer_sync(&gpu->hangcheck_timer);
cancel_work_sync(&gpu->recover_work);
}
/* fence object management */
struct etnaviv_fence {
struct etnaviv_gpu *gpu;
struct fence base;
};
static inline struct etnaviv_fence *to_etnaviv_fence(struct fence *fence)
{
return container_of(fence, struct etnaviv_fence, base);
}
static const char *etnaviv_fence_get_driver_name(struct fence *fence)
{
return "etnaviv";
}
static const char *etnaviv_fence_get_timeline_name(struct fence *fence)
{
struct etnaviv_fence *f = to_etnaviv_fence(fence);
return dev_name(f->gpu->dev);
}
static bool etnaviv_fence_enable_signaling(struct fence *fence)
{
return true;
}
static bool etnaviv_fence_signaled(struct fence *fence)
{
struct etnaviv_fence *f = to_etnaviv_fence(fence);
return fence_completed(f->gpu, f->base.seqno);
}
static void etnaviv_fence_release(struct fence *fence)
{
struct etnaviv_fence *f = to_etnaviv_fence(fence);
kfree_rcu(f, base.rcu);
}
static const struct fence_ops etnaviv_fence_ops = {
.get_driver_name = etnaviv_fence_get_driver_name,
.get_timeline_name = etnaviv_fence_get_timeline_name,
.enable_signaling = etnaviv_fence_enable_signaling,
.signaled = etnaviv_fence_signaled,
.wait = fence_default_wait,
.release = etnaviv_fence_release,
};
static struct fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
{
struct etnaviv_fence *f;
f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
return NULL;
f->gpu = gpu;
fence_init(&f->base, &etnaviv_fence_ops, &gpu->fence_spinlock,
gpu->fence_context, ++gpu->next_fence);
return &f->base;
}
int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
unsigned int context, bool exclusive)
{
struct reservation_object *robj = etnaviv_obj->resv;
struct reservation_object_list *fobj;
struct fence *fence;
int i, ret;
if (!exclusive) {
ret = reservation_object_reserve_shared(robj);
if (ret)
return ret;
}
/*
* If we have any shared fences, then the exclusive fence
* should be ignored as it will already have been signalled.
*/
fobj = reservation_object_get_list(robj);
if (!fobj || fobj->shared_count == 0) {
/* Wait on any existing exclusive fence which isn't our own */
fence = reservation_object_get_excl(robj);
if (fence && fence->context != context) {
ret = fence_wait(fence, true);
if (ret)
return ret;
}
}
if (!exclusive || !fobj)
return 0;
for (i = 0; i < fobj->shared_count; i++) {
fence = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(robj));
if (fence->context != context) {
ret = fence_wait(fence, true);
if (ret)
return ret;
}
}
return 0;
}
/*
* event management:
*/
static unsigned int event_alloc(struct etnaviv_gpu *gpu)
{
unsigned long ret, flags;
unsigned int i, event = ~0U;
ret = wait_for_completion_timeout(&gpu->event_free,
msecs_to_jiffies(10 * 10000));
if (!ret)
dev_err(gpu->dev, "wait_for_completion_timeout failed");
spin_lock_irqsave(&gpu->event_spinlock, flags);
/* find first free event */
for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
if (gpu->event[i].used == false) {
gpu->event[i].used = true;
event = i;
break;
}
}
spin_unlock_irqrestore(&gpu->event_spinlock, flags);
return event;
}
static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
{
unsigned long flags;
spin_lock_irqsave(&gpu->event_spinlock, flags);
if (gpu->event[event].used == false) {
dev_warn(gpu->dev, "event %u is already marked as free",
event);
spin_unlock_irqrestore(&gpu->event_spinlock, flags);
} else {
gpu->event[event].used = false;
spin_unlock_irqrestore(&gpu->event_spinlock, flags);
complete(&gpu->event_free);
}
}
/*
* Cmdstream submission/retirement:
*/
struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
size_t nr_bos)
{
struct etnaviv_cmdbuf *cmdbuf;
size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
sizeof(*cmdbuf));
cmdbuf = kzalloc(sz, GFP_KERNEL);
if (!cmdbuf)
return NULL;
dma, mm/pat: Rename dma_*_writecombine() to dma_*_wc() Rename dma_*_writecombine() to dma_*_wc(), so that the naming is coherent across the various write-combining APIs. Keep the old names for compatibility for a while, these can be removed at a later time. A guard is left to enable backporting of the rename, and later remove of the old mapping defines seemlessly. Build tested successfully with allmodconfig. The following Coccinelle SmPL patch was used for this simple transformation: @ rename_dma_alloc_writecombine @ expression dev, size, dma_addr, gfp; @@ -dma_alloc_writecombine(dev, size, dma_addr, gfp) +dma_alloc_wc(dev, size, dma_addr, gfp) @ rename_dma_free_writecombine @ expression dev, size, cpu_addr, dma_addr; @@ -dma_free_writecombine(dev, size, cpu_addr, dma_addr) +dma_free_wc(dev, size, cpu_addr, dma_addr) @ rename_dma_mmap_writecombine @ expression dev, vma, cpu_addr, dma_addr, size; @@ -dma_mmap_writecombine(dev, vma, cpu_addr, dma_addr, size) +dma_mmap_wc(dev, vma, cpu_addr, dma_addr, size) We also keep the old names as compatibility helpers, and guard against their definition to make backporting easier. Generated-by: Coccinelle SmPL Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bhelgaas@google.com Cc: bp@suse.de Cc: dan.j.williams@intel.com Cc: daniel.vetter@ffwll.ch Cc: dhowells@redhat.com Cc: julia.lawall@lip6.fr Cc: konrad.wilk@oracle.com Cc: linux-fbdev@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: luto@amacapital.net Cc: mst@redhat.com Cc: tomi.valkeinen@ti.com Cc: toshi.kani@hp.com Cc: vinod.koul@intel.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1453516462-4844-1-git-send-email-mcgrof@do-not-panic.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-01-23 10:34:22 +08:00
cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
GFP_KERNEL);
if (!cmdbuf->vaddr) {
kfree(cmdbuf);
return NULL;
}
cmdbuf->gpu = gpu;
cmdbuf->size = size;
return cmdbuf;
}
void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
{
dma, mm/pat: Rename dma_*_writecombine() to dma_*_wc() Rename dma_*_writecombine() to dma_*_wc(), so that the naming is coherent across the various write-combining APIs. Keep the old names for compatibility for a while, these can be removed at a later time. A guard is left to enable backporting of the rename, and later remove of the old mapping defines seemlessly. Build tested successfully with allmodconfig. The following Coccinelle SmPL patch was used for this simple transformation: @ rename_dma_alloc_writecombine @ expression dev, size, dma_addr, gfp; @@ -dma_alloc_writecombine(dev, size, dma_addr, gfp) +dma_alloc_wc(dev, size, dma_addr, gfp) @ rename_dma_free_writecombine @ expression dev, size, cpu_addr, dma_addr; @@ -dma_free_writecombine(dev, size, cpu_addr, dma_addr) +dma_free_wc(dev, size, cpu_addr, dma_addr) @ rename_dma_mmap_writecombine @ expression dev, vma, cpu_addr, dma_addr, size; @@ -dma_mmap_writecombine(dev, vma, cpu_addr, dma_addr, size) +dma_mmap_wc(dev, vma, cpu_addr, dma_addr, size) We also keep the old names as compatibility helpers, and guard against their definition to make backporting easier. Generated-by: Coccinelle SmPL Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bhelgaas@google.com Cc: bp@suse.de Cc: dan.j.williams@intel.com Cc: daniel.vetter@ffwll.ch Cc: dhowells@redhat.com Cc: julia.lawall@lip6.fr Cc: konrad.wilk@oracle.com Cc: linux-fbdev@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: luto@amacapital.net Cc: mst@redhat.com Cc: tomi.valkeinen@ti.com Cc: toshi.kani@hp.com Cc: vinod.koul@intel.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1453516462-4844-1-git-send-email-mcgrof@do-not-panic.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-01-23 10:34:22 +08:00
dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
cmdbuf->paddr);
kfree(cmdbuf);
}
static void retire_worker(struct work_struct *work)
{
struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
retire_work);
u32 fence = gpu->completed_fence;
struct etnaviv_cmdbuf *cmdbuf, *tmp;
unsigned int i;
mutex_lock(&gpu->lock);
list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) {
if (!fence_is_signaled(cmdbuf->fence))
break;
list_del(&cmdbuf->node);
fence_put(cmdbuf->fence);
for (i = 0; i < cmdbuf->nr_bos; i++) {
struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i];
struct etnaviv_gem_object *etnaviv_obj = mapping->object;
atomic_dec(&etnaviv_obj->gpu_active);
/* drop the refcount taken in etnaviv_gpu_submit */
etnaviv_gem_mapping_unreference(mapping);
}
etnaviv_gpu_cmdbuf_free(cmdbuf);
/*
* We need to balance the runtime PM count caused by
* each submission. Upon submission, we increment
* the runtime PM counter, and allocate one event.
* So here, we put the runtime PM count for each
* completed event.
*/
pm_runtime_put_autosuspend(gpu->dev);
}
gpu->retired_fence = fence;
mutex_unlock(&gpu->lock);
wake_up_all(&gpu->fence_event);
}
int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
u32 fence, struct timespec *timeout)
{
int ret;
if (fence_after(fence, gpu->next_fence)) {
DRM_ERROR("waiting on invalid fence: %u (of %u)\n",
fence, gpu->next_fence);
return -EINVAL;
}
if (!timeout) {
/* No timeout was requested: just test for completion */
ret = fence_completed(gpu, fence) ? 0 : -EBUSY;
} else {
unsigned long remaining = etnaviv_timeout_to_jiffies(timeout);
ret = wait_event_interruptible_timeout(gpu->fence_event,
fence_completed(gpu, fence),
remaining);
if (ret == 0) {
DBG("timeout waiting for fence: %u (retired: %u completed: %u)",
fence, gpu->retired_fence,
gpu->completed_fence);
ret = -ETIMEDOUT;
} else if (ret != -ERESTARTSYS) {
ret = 0;
}
}
return ret;
}
/*
* Wait for an object to become inactive. This, on it's own, is not race
* free: the object is moved by the retire worker off the active list, and
* then the iova is put. Moreover, the object could be re-submitted just
* after we notice that it's become inactive.
*
* Although the retirement happens under the gpu lock, we don't want to hold
* that lock in this function while waiting.
*/
int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout)
{
unsigned long remaining;
long ret;
if (!timeout)
return !is_active(etnaviv_obj) ? 0 : -EBUSY;
remaining = etnaviv_timeout_to_jiffies(timeout);
ret = wait_event_interruptible_timeout(gpu->fence_event,
!is_active(etnaviv_obj),
remaining);
if (ret > 0) {
struct etnaviv_drm_private *priv = gpu->drm->dev_private;
/* Synchronise with the retire worker */
flush_workqueue(priv->wq);
return 0;
} else if (ret == -ERESTARTSYS) {
return -ERESTARTSYS;
} else {
return -ETIMEDOUT;
}
}
int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu)
{
return pm_runtime_get_sync(gpu->dev);
}
void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
{
pm_runtime_mark_last_busy(gpu->dev);
pm_runtime_put_autosuspend(gpu->dev);
}
/* add bo's to gpu's ring, and kick gpu: */
int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
{
struct fence *fence;
unsigned int event, i;
int ret;
ret = etnaviv_gpu_pm_get_sync(gpu);
if (ret < 0)
return ret;
/*
* TODO
*
* - flush
* - data endian
* - prefetch
*
*/
event = event_alloc(gpu);
if (unlikely(event == ~0U)) {
DRM_ERROR("no free event\n");
ret = -EBUSY;
goto out_pm_put;
}
fence = etnaviv_gpu_fence_alloc(gpu);
if (!fence) {
event_free(gpu, event);
ret = -ENOMEM;
goto out_pm_put;
}
mutex_lock(&gpu->lock);
gpu->event[event].fence = fence;
submit->fence = fence->seqno;
gpu->active_fence = submit->fence;
if (gpu->lastctx != cmdbuf->ctx) {
gpu->mmu->need_flush = true;
gpu->switch_context = true;
gpu->lastctx = cmdbuf->ctx;
}
etnaviv_buffer_queue(gpu, event, cmdbuf);
cmdbuf->fence = fence;
list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
/* We're committed to adding this command buffer, hold a PM reference */
pm_runtime_get_noresume(gpu->dev);
for (i = 0; i < submit->nr_bos; i++) {
struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
/* Each cmdbuf takes a refcount on the mapping */
etnaviv_gem_mapping_reference(submit->bos[i].mapping);
cmdbuf->bo_map[i] = submit->bos[i].mapping;
atomic_inc(&etnaviv_obj->gpu_active);
if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
reservation_object_add_excl_fence(etnaviv_obj->resv,
fence);
else
reservation_object_add_shared_fence(etnaviv_obj->resv,
fence);
}
cmdbuf->nr_bos = submit->nr_bos;
hangcheck_timer_reset(gpu);
ret = 0;
mutex_unlock(&gpu->lock);
out_pm_put:
etnaviv_gpu_pm_put(gpu);
return ret;
}
/*
* Init/Cleanup:
*/
static irqreturn_t irq_handler(int irq, void *data)
{
struct etnaviv_gpu *gpu = data;
irqreturn_t ret = IRQ_NONE;
u32 intr = gpu_read(gpu, VIVS_HI_INTR_ACKNOWLEDGE);
if (intr != 0) {
int event;
pm_runtime_mark_last_busy(gpu->dev);
dev_dbg(gpu->dev, "intr 0x%08x\n", intr);
if (intr & VIVS_HI_INTR_ACKNOWLEDGE_AXI_BUS_ERROR) {
dev_err(gpu->dev, "AXI bus error\n");
intr &= ~VIVS_HI_INTR_ACKNOWLEDGE_AXI_BUS_ERROR;
}
while ((event = ffs(intr)) != 0) {
struct fence *fence;
event -= 1;
intr &= ~(1 << event);
dev_dbg(gpu->dev, "event %u\n", event);
fence = gpu->event[event].fence;
gpu->event[event].fence = NULL;
fence_signal(fence);
/*
* Events can be processed out of order. Eg,
* - allocate and queue event 0
* - allocate event 1
* - event 0 completes, we process it
* - allocate and queue event 0
* - event 1 and event 0 complete
* we can end up processing event 0 first, then 1.
*/
if (fence_after(fence->seqno, gpu->completed_fence))
gpu->completed_fence = fence->seqno;
event_free(gpu, event);
}
/* Retire the buffer objects in a work */
etnaviv_queue_work(gpu->drm, &gpu->retire_work);
ret = IRQ_HANDLED;
}
return ret;
}
static int etnaviv_gpu_clk_enable(struct etnaviv_gpu *gpu)
{
int ret;
ret = enable_clk(gpu);
if (ret)
return ret;
ret = enable_axi(gpu);
if (ret) {
disable_clk(gpu);
return ret;
}
return 0;
}
static int etnaviv_gpu_clk_disable(struct etnaviv_gpu *gpu)
{
int ret;
ret = disable_axi(gpu);
if (ret)
return ret;
ret = disable_clk(gpu);
if (ret)
return ret;
return 0;
}
static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
{
if (gpu->buffer) {
unsigned long timeout;
/* Replace the last WAIT with END */
etnaviv_buffer_end(gpu);
/*
* We know that only the FE is busy here, this should
* happen quickly (as the WAIT is only 200 cycles). If
* we fail, just warn and continue.
*/
timeout = jiffies + msecs_to_jiffies(100);
do {
u32 idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
if ((idle & gpu->idle_mask) == gpu->idle_mask)
break;
if (time_is_before_jiffies(timeout)) {
dev_warn(gpu->dev,
"timed out waiting for idle: idle=0x%x\n",
idle);
break;
}
udelay(5);
} while (1);
}
return etnaviv_gpu_clk_disable(gpu);
}
#ifdef CONFIG_PM
static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
{
u32 clock;
int ret;
ret = mutex_lock_killable(&gpu->lock);
if (ret)
return ret;
clock = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
etnaviv_gpu_load_clock(gpu, clock);
etnaviv_gpu_hw_init(gpu);
gpu->switch_context = true;
gpu->exec_state = -1;
mutex_unlock(&gpu->lock);
return 0;
}
#endif
static int etnaviv_gpu_bind(struct device *dev, struct device *master,
void *data)
{
struct drm_device *drm = data;
struct etnaviv_drm_private *priv = drm->dev_private;
struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
int ret;
#ifdef CONFIG_PM
ret = pm_runtime_get_sync(gpu->dev);
#else
ret = etnaviv_gpu_clk_enable(gpu);
#endif
if (ret < 0)
return ret;
gpu->drm = drm;
gpu->fence_context = fence_context_alloc(1);
spin_lock_init(&gpu->fence_spinlock);
INIT_LIST_HEAD(&gpu->active_cmd_list);
INIT_WORK(&gpu->retire_work, retire_worker);
INIT_WORK(&gpu->recover_work, recover_worker);
init_waitqueue_head(&gpu->fence_event);
setup_deferrable_timer(&gpu->hangcheck_timer, hangcheck_handler,
(unsigned long)gpu);
priv->gpu[priv->num_gpus++] = gpu;
pm_runtime_mark_last_busy(gpu->dev);
pm_runtime_put_autosuspend(gpu->dev);
return 0;
}
static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
void *data)
{
struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
DBG("%s", dev_name(gpu->dev));
hangcheck_disable(gpu);
#ifdef CONFIG_PM
pm_runtime_get_sync(gpu->dev);
pm_runtime_put_sync_suspend(gpu->dev);
#else
etnaviv_gpu_hw_suspend(gpu);
#endif
if (gpu->buffer) {
etnaviv_gpu_cmdbuf_free(gpu->buffer);
gpu->buffer = NULL;
}
if (gpu->mmu) {
etnaviv_iommu_destroy(gpu->mmu);
gpu->mmu = NULL;
}
gpu->drm = NULL;
}
static const struct component_ops gpu_ops = {
.bind = etnaviv_gpu_bind,
.unbind = etnaviv_gpu_unbind,
};
static const struct of_device_id etnaviv_gpu_match[] = {
{
.compatible = "vivante,gc"
},
{ /* sentinel */ }
};
static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct etnaviv_gpu *gpu;
int err;
gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL);
if (!gpu)
return -ENOMEM;
gpu->dev = &pdev->dev;
mutex_init(&gpu->lock);
/* Map registers: */
gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev));
if (IS_ERR(gpu->mmio))
return PTR_ERR(gpu->mmio);
/* Get Interrupt: */
gpu->irq = platform_get_irq(pdev, 0);
if (gpu->irq < 0) {
dev_err(dev, "failed to get irq: %d\n", gpu->irq);
return gpu->irq;
}
err = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, 0,
dev_name(gpu->dev), gpu);
if (err) {
dev_err(dev, "failed to request IRQ%u: %d\n", gpu->irq, err);
return err;
}
/* Get Clocks: */
gpu->clk_bus = devm_clk_get(&pdev->dev, "bus");
DBG("clk_bus: %p", gpu->clk_bus);
if (IS_ERR(gpu->clk_bus))
gpu->clk_bus = NULL;
gpu->clk_core = devm_clk_get(&pdev->dev, "core");
DBG("clk_core: %p", gpu->clk_core);
if (IS_ERR(gpu->clk_core))
gpu->clk_core = NULL;
gpu->clk_shader = devm_clk_get(&pdev->dev, "shader");
DBG("clk_shader: %p", gpu->clk_shader);
if (IS_ERR(gpu->clk_shader))
gpu->clk_shader = NULL;
/* TODO: figure out max mapped size */
dev_set_drvdata(dev, gpu);
/*
* We treat the device as initially suspended. The runtime PM
* autosuspend delay is rather arbitary: no measurements have
* yet been performed to determine an appropriate value.
*/
pm_runtime_use_autosuspend(gpu->dev);
pm_runtime_set_autosuspend_delay(gpu->dev, 200);
pm_runtime_enable(gpu->dev);
err = component_add(&pdev->dev, &gpu_ops);
if (err < 0) {
dev_err(&pdev->dev, "failed to register component: %d\n", err);
return err;
}
return 0;
}
static int etnaviv_gpu_platform_remove(struct platform_device *pdev)
{
component_del(&pdev->dev, &gpu_ops);
pm_runtime_disable(&pdev->dev);
return 0;
}
#ifdef CONFIG_PM
static int etnaviv_gpu_rpm_suspend(struct device *dev)
{
struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
u32 idle, mask;
/* If we have outstanding fences, we're not idle */
if (gpu->completed_fence != gpu->active_fence)
return -EBUSY;
/* Check whether the hardware (except FE) is idle */
mask = gpu->idle_mask & ~VIVS_HI_IDLE_STATE_FE;
idle = gpu_read(gpu, VIVS_HI_IDLE_STATE) & mask;
if (idle != mask)
return -EBUSY;
return etnaviv_gpu_hw_suspend(gpu);
}
static int etnaviv_gpu_rpm_resume(struct device *dev)
{
struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
int ret;
ret = etnaviv_gpu_clk_enable(gpu);
if (ret)
return ret;
/* Re-initialise the basic hardware state */
if (gpu->drm && gpu->buffer) {
ret = etnaviv_gpu_hw_resume(gpu);
if (ret) {
etnaviv_gpu_clk_disable(gpu);
return ret;
}
}
return 0;
}
#endif
static const struct dev_pm_ops etnaviv_gpu_pm_ops = {
SET_RUNTIME_PM_OPS(etnaviv_gpu_rpm_suspend, etnaviv_gpu_rpm_resume,
NULL)
};
struct platform_driver etnaviv_gpu_driver = {
.driver = {
.name = "etnaviv-gpu",
.owner = THIS_MODULE,
.pm = &etnaviv_gpu_pm_ops,
.of_match_table = etnaviv_gpu_match,
},
.probe = etnaviv_gpu_platform_probe,
.remove = etnaviv_gpu_platform_remove,
.id_table = gpu_ids,
};