2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Copyright 2008 Advanced Micro Devices, Inc.
|
|
|
|
* Copyright 2008 Red Hat Inc.
|
|
|
|
* Copyright 2009 Jerome Glisse.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* Authors: Dave Airlie
|
|
|
|
* Alex Deucher
|
|
|
|
* Jerome Glisse
|
|
|
|
*/
|
|
|
|
#ifndef __RADEON_H__
|
|
|
|
#define __RADEON_H__
|
|
|
|
|
|
|
|
/* TODO: Here are things that needs to be done :
|
|
|
|
* - surface allocator & initializer : (bit like scratch reg) should
|
|
|
|
* initialize HDP_ stuff on RS600, R600, R700 hw, well anythings
|
|
|
|
* related to surface
|
|
|
|
* - WB : write back stuff (do it bit like scratch reg things)
|
|
|
|
* - Vblank : look at Jesse's rework and what we should do
|
|
|
|
* - r600/r700: gart & cp
|
|
|
|
* - cs : clean cs ioctl use bitmap & things like that.
|
|
|
|
* - power management stuff
|
|
|
|
* - Barrier in gart code
|
|
|
|
* - Unmappabled vram ?
|
|
|
|
* - TESTING, TESTING, TESTING
|
|
|
|
*/
|
|
|
|
|
2009-09-29 00:34:43 +08:00
|
|
|
/* Initialization path:
|
|
|
|
* We expect that acceleration initialization might fail for various
|
|
|
|
* reasons even thought we work hard to make it works on most
|
|
|
|
* configurations. In order to still have a working userspace in such
|
|
|
|
* situation the init path must succeed up to the memory controller
|
|
|
|
* initialization point. Failure before this point are considered as
|
|
|
|
* fatal error. Here is the init callchain :
|
|
|
|
* radeon_device_init perform common structure, mutex initialization
|
|
|
|
* asic_init setup the GPU memory layout and perform all
|
|
|
|
* one time initialization (failure in this
|
|
|
|
* function are considered fatal)
|
|
|
|
* asic_startup setup the GPU acceleration, in order to
|
|
|
|
* follow guideline the first thing this
|
|
|
|
* function should do is setting the GPU
|
|
|
|
* memory controller (only MC setup failure
|
|
|
|
* are considered as fatal)
|
|
|
|
*/
|
|
|
|
|
2011-07-27 07:09:06 +08:00
|
|
|
#include <linux/atomic.h>
|
2009-06-05 20:42:42 +08:00
|
|
|
#include <linux/wait.h>
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/kref.h>
|
2014-07-30 23:49:56 +08:00
|
|
|
#include <linux/interval_tree.h>
|
2014-08-07 15:36:03 +08:00
|
|
|
#include <linux/hashtable.h>
|
2016-10-25 20:00:45 +08:00
|
|
|
#include <linux/dma-fence.h>
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
#include <ttm/ttm_bo_api.h>
|
|
|
|
#include <ttm/ttm_bo_driver.h>
|
|
|
|
#include <ttm/ttm_placement.h>
|
|
|
|
#include <ttm/ttm_module.h>
|
2010-11-17 20:38:32 +08:00
|
|
|
#include <ttm/ttm_execbuf_util.h>
|
2009-11-20 21:29:23 +08:00
|
|
|
|
2014-09-23 21:46:53 +08:00
|
|
|
#include <drm/drm_gem.h>
|
|
|
|
|
2009-09-22 06:50:10 +08:00
|
|
|
#include "radeon_family.h"
|
2009-06-05 20:42:42 +08:00
|
|
|
#include "radeon_mode.h"
|
|
|
|
#include "radeon_reg.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Modules parameters.
|
|
|
|
*/
|
|
|
|
extern int radeon_no_wb;
|
|
|
|
extern int radeon_modeset;
|
|
|
|
extern int radeon_dynclks;
|
|
|
|
extern int radeon_r4xx_atom;
|
|
|
|
extern int radeon_agpmode;
|
|
|
|
extern int radeon_vram_limit;
|
|
|
|
extern int radeon_gart_size;
|
|
|
|
extern int radeon_benchmarking;
|
2009-07-21 17:23:57 +08:00
|
|
|
extern int radeon_testing;
|
2009-06-05 20:42:42 +08:00
|
|
|
extern int radeon_connector_table;
|
2009-08-13 14:32:14 +08:00
|
|
|
extern int radeon_tv;
|
2009-10-12 05:49:13 +08:00
|
|
|
extern int radeon_audio;
|
2010-03-31 12:33:27 +08:00
|
|
|
extern int radeon_disp_priority;
|
2010-03-17 14:07:37 +08:00
|
|
|
extern int radeon_hw_i2c;
|
2011-01-13 09:05:11 +08:00
|
|
|
extern int radeon_pcie_gen2;
|
2011-11-02 02:20:30 +08:00
|
|
|
extern int radeon_msi;
|
2012-05-02 21:11:21 +08:00
|
|
|
extern int radeon_lockup_timeout;
|
2013-04-09 05:25:47 +08:00
|
|
|
extern int radeon_fastfb;
|
2013-04-13 01:55:22 +08:00
|
|
|
extern int radeon_dpm;
|
2013-07-17 03:58:50 +08:00
|
|
|
extern int radeon_aspm;
|
2012-09-17 12:40:31 +08:00
|
|
|
extern int radeon_runtime_pm;
|
2014-01-09 06:55:08 +08:00
|
|
|
extern int radeon_hard_reset;
|
2014-06-06 11:47:32 +08:00
|
|
|
extern int radeon_vm_size;
|
2014-06-06 11:56:50 +08:00
|
|
|
extern int radeon_vm_block_size;
|
2014-07-01 23:23:03 +08:00
|
|
|
extern int radeon_deep_color;
|
2014-07-29 12:21:44 +08:00
|
|
|
extern int radeon_use_pflipirq;
|
2014-08-07 21:28:31 +08:00
|
|
|
extern int radeon_bapm;
|
2014-09-17 08:57:26 +08:00
|
|
|
extern int radeon_backlight;
|
2015-02-20 07:21:36 +08:00
|
|
|
extern int radeon_auxch;
|
2015-02-24 07:24:04 +08:00
|
|
|
extern int radeon_mst;
|
2016-03-18 23:58:36 +08:00
|
|
|
extern int radeon_uvd;
|
2016-03-18 23:58:37 +08:00
|
|
|
extern int radeon_vce;
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy from radeon_drv.h so we don't have to include both and have conflicting
|
|
|
|
* symbol;
|
|
|
|
*/
|
2012-05-09 21:34:46 +08:00
|
|
|
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
|
|
|
|
#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2)
|
2016-02-08 05:51:12 +08:00
|
|
|
#define RADEON_USEC_IB_TEST_TIMEOUT 1000000 /* 1s */
|
2010-02-16 04:36:13 +08:00
|
|
|
/* RADEON_IB_POOL_SIZE must be a power of 2 */
|
2012-05-09 21:34:46 +08:00
|
|
|
#define RADEON_IB_POOL_SIZE 16
|
|
|
|
#define RADEON_DEBUGFS_MAX_COMPONENTS 32
|
|
|
|
#define RADEONFB_CONN_LIMIT 4
|
|
|
|
#define RADEON_BIOS_NUM_SCRATCH 8
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-11-18 09:13:28 +08:00
|
|
|
/* internal ring indices */
|
|
|
|
/* r1xx+ has gfx CP ring */
|
2013-05-23 18:10:04 +08:00
|
|
|
#define RADEON_RING_TYPE_GFX_INDEX 0
|
2011-11-18 09:13:28 +08:00
|
|
|
|
|
|
|
/* cayman has 2 compute CP rings */
|
2013-05-23 18:10:04 +08:00
|
|
|
#define CAYMAN_RING_TYPE_CP1_INDEX 1
|
|
|
|
#define CAYMAN_RING_TYPE_CP2_INDEX 2
|
2011-11-18 09:13:28 +08:00
|
|
|
|
2012-09-28 03:08:35 +08:00
|
|
|
/* R600+ has an async dma ring */
|
|
|
|
#define R600_RING_TYPE_DMA_INDEX 3
|
2012-12-05 04:27:33 +08:00
|
|
|
/* cayman add a second async dma ring */
|
|
|
|
#define CAYMAN_RING_TYPE_DMA1_INDEX 4
|
2012-09-28 03:08:35 +08:00
|
|
|
|
2013-04-08 18:41:29 +08:00
|
|
|
/* R600+ */
|
2013-05-23 18:10:04 +08:00
|
|
|
#define R600_RING_TYPE_UVD_INDEX 5
|
|
|
|
|
|
|
|
/* TN+ */
|
|
|
|
#define TN_RING_TYPE_VCE1_INDEX 6
|
|
|
|
#define TN_RING_TYPE_VCE2_INDEX 7
|
|
|
|
|
|
|
|
/* max number of rings */
|
|
|
|
#define RADEON_NUM_RINGS 8
|
2013-04-08 18:41:29 +08:00
|
|
|
|
2014-02-18 16:50:22 +08:00
|
|
|
/* number of hw syncs before falling back on blocking */
|
|
|
|
#define RADEON_NUM_SYNCS 4
|
2013-04-08 18:41:29 +08:00
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* hardcode those limit for now */
|
2012-09-11 22:09:59 +08:00
|
|
|
#define RADEON_VA_IB_OFFSET (1 << 20)
|
2012-05-09 21:34:46 +08:00
|
|
|
#define RADEON_VA_RESERVED_SIZE (8 << 20)
|
|
|
|
#define RADEON_IB_VM_MAX_SIZE (64 << 10)
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
|
2013-10-03 01:01:36 +08:00
|
|
|
/* hard reset data */
|
|
|
|
#define RADEON_ASIC_RESET_DATA 0x39d5e86b
|
|
|
|
|
2013-01-04 01:07:30 +08:00
|
|
|
/* reset flags */
|
|
|
|
#define RADEON_RESET_GFX (1 << 0)
|
|
|
|
#define RADEON_RESET_COMPUTE (1 << 1)
|
|
|
|
#define RADEON_RESET_DMA (1 << 2)
|
2013-01-19 01:18:17 +08:00
|
|
|
#define RADEON_RESET_CP (1 << 3)
|
|
|
|
#define RADEON_RESET_GRBM (1 << 4)
|
|
|
|
#define RADEON_RESET_DMA1 (1 << 5)
|
|
|
|
#define RADEON_RESET_RLC (1 << 6)
|
|
|
|
#define RADEON_RESET_SEM (1 << 7)
|
|
|
|
#define RADEON_RESET_IH (1 << 8)
|
|
|
|
#define RADEON_RESET_VMC (1 << 9)
|
|
|
|
#define RADEON_RESET_MC (1 << 10)
|
|
|
|
#define RADEON_RESET_DISPLAY (1 << 11)
|
2013-01-04 01:07:30 +08:00
|
|
|
|
2013-07-23 21:41:05 +08:00
|
|
|
/* CG block flags */
|
|
|
|
#define RADEON_CG_BLOCK_GFX (1 << 0)
|
|
|
|
#define RADEON_CG_BLOCK_MC (1 << 1)
|
|
|
|
#define RADEON_CG_BLOCK_SDMA (1 << 2)
|
|
|
|
#define RADEON_CG_BLOCK_UVD (1 << 3)
|
|
|
|
#define RADEON_CG_BLOCK_VCE (1 << 4)
|
|
|
|
#define RADEON_CG_BLOCK_HDP (1 << 5)
|
2013-08-09 07:34:07 +08:00
|
|
|
#define RADEON_CG_BLOCK_BIF (1 << 6)
|
2013-07-23 21:41:05 +08:00
|
|
|
|
2013-08-09 04:31:25 +08:00
|
|
|
/* CG flags */
|
|
|
|
#define RADEON_CG_SUPPORT_GFX_MGCG (1 << 0)
|
|
|
|
#define RADEON_CG_SUPPORT_GFX_MGLS (1 << 1)
|
|
|
|
#define RADEON_CG_SUPPORT_GFX_CGCG (1 << 2)
|
|
|
|
#define RADEON_CG_SUPPORT_GFX_CGLS (1 << 3)
|
|
|
|
#define RADEON_CG_SUPPORT_GFX_CGTS (1 << 4)
|
|
|
|
#define RADEON_CG_SUPPORT_GFX_CGTS_LS (1 << 5)
|
|
|
|
#define RADEON_CG_SUPPORT_GFX_CP_LS (1 << 6)
|
|
|
|
#define RADEON_CG_SUPPORT_GFX_RLC_LS (1 << 7)
|
|
|
|
#define RADEON_CG_SUPPORT_MC_LS (1 << 8)
|
|
|
|
#define RADEON_CG_SUPPORT_MC_MGCG (1 << 9)
|
|
|
|
#define RADEON_CG_SUPPORT_SDMA_LS (1 << 10)
|
|
|
|
#define RADEON_CG_SUPPORT_SDMA_MGCG (1 << 11)
|
|
|
|
#define RADEON_CG_SUPPORT_BIF_LS (1 << 12)
|
|
|
|
#define RADEON_CG_SUPPORT_UVD_MGCG (1 << 13)
|
|
|
|
#define RADEON_CG_SUPPORT_VCE_MGCG (1 << 14)
|
|
|
|
#define RADEON_CG_SUPPORT_HDP_LS (1 << 15)
|
|
|
|
#define RADEON_CG_SUPPORT_HDP_MGCG (1 << 16)
|
|
|
|
|
|
|
|
/* PG flags */
|
2013-09-05 04:58:29 +08:00
|
|
|
#define RADEON_PG_SUPPORT_GFX_PG (1 << 0)
|
2013-08-09 04:31:25 +08:00
|
|
|
#define RADEON_PG_SUPPORT_GFX_SMG (1 << 1)
|
|
|
|
#define RADEON_PG_SUPPORT_GFX_DMG (1 << 2)
|
|
|
|
#define RADEON_PG_SUPPORT_UVD (1 << 3)
|
|
|
|
#define RADEON_PG_SUPPORT_VCE (1 << 4)
|
|
|
|
#define RADEON_PG_SUPPORT_CP (1 << 5)
|
|
|
|
#define RADEON_PG_SUPPORT_GDS (1 << 6)
|
|
|
|
#define RADEON_PG_SUPPORT_RLC_SMU_HS (1 << 7)
|
|
|
|
#define RADEON_PG_SUPPORT_SDMA (1 << 8)
|
|
|
|
#define RADEON_PG_SUPPORT_ACP (1 << 9)
|
|
|
|
#define RADEON_PG_SUPPORT_SAMU (1 << 10)
|
|
|
|
|
2013-01-24 23:06:33 +08:00
|
|
|
/* max cursor sizes (in pixels) */
|
|
|
|
#define CURSOR_WIDTH 64
|
|
|
|
#define CURSOR_HEIGHT 64
|
|
|
|
|
|
|
|
#define CIK_CURSOR_WIDTH 128
|
|
|
|
#define CIK_CURSOR_HEIGHT 128
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Errata workarounds.
|
|
|
|
*/
|
|
|
|
enum radeon_pll_errata {
|
|
|
|
CHIP_ERRATA_R300_CG = 0x00000001,
|
|
|
|
CHIP_ERRATA_PLL_DUMMYREADS = 0x00000002,
|
|
|
|
CHIP_ERRATA_PLL_DELAY = 0x00000004
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct radeon_device;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BIOS.
|
|
|
|
*/
|
|
|
|
bool radeon_get_bios(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
/*
|
2009-09-08 08:10:24 +08:00
|
|
|
* Dummy page
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2009-09-08 08:10:24 +08:00
|
|
|
struct radeon_dummy_page {
|
2015-01-21 16:36:35 +08:00
|
|
|
uint64_t entry;
|
2009-09-08 08:10:24 +08:00
|
|
|
struct page *page;
|
|
|
|
dma_addr_t addr;
|
|
|
|
};
|
|
|
|
int radeon_dummy_page_init(struct radeon_device *rdev);
|
|
|
|
void radeon_dummy_page_fini(struct radeon_device *rdev);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-09-08 08:10:24 +08:00
|
|
|
/*
|
|
|
|
* Clocks
|
|
|
|
*/
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_clock {
|
|
|
|
struct radeon_pll p1pll;
|
|
|
|
struct radeon_pll p2pll;
|
2010-01-13 06:54:34 +08:00
|
|
|
struct radeon_pll dcpll;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_pll spll;
|
|
|
|
struct radeon_pll mpll;
|
|
|
|
/* 10 Khz units */
|
|
|
|
uint32_t default_mclk;
|
|
|
|
uint32_t default_sclk;
|
2010-01-13 06:54:34 +08:00
|
|
|
uint32_t default_dispclk;
|
2013-03-23 03:59:10 +08:00
|
|
|
uint32_t current_dispclk;
|
2010-01-13 06:54:34 +08:00
|
|
|
uint32_t dp_extclk;
|
2011-06-09 01:01:11 +08:00
|
|
|
uint32_t max_pixel_clock;
|
2016-01-27 05:45:10 +08:00
|
|
|
uint32_t vco_freq;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-11-03 07:53:02 +08:00
|
|
|
/*
|
|
|
|
* Power management
|
|
|
|
*/
|
|
|
|
int radeon_pm_init(struct radeon_device *rdev);
|
2013-12-20 00:37:22 +08:00
|
|
|
int radeon_pm_late_init(struct radeon_device *rdev);
|
2010-03-11 23:01:17 +08:00
|
|
|
void radeon_pm_fini(struct radeon_device *rdev);
|
2009-12-23 06:02:16 +08:00
|
|
|
void radeon_pm_compute_clocks(struct radeon_device *rdev);
|
2010-05-08 03:10:16 +08:00
|
|
|
void radeon_pm_suspend(struct radeon_device *rdev);
|
|
|
|
void radeon_pm_resume(struct radeon_device *rdev);
|
2009-12-29 02:58:44 +08:00
|
|
|
void radeon_combios_get_power_modes(struct radeon_device *rdev);
|
|
|
|
void radeon_atombios_get_power_modes(struct radeon_device *rdev);
|
2013-04-08 18:41:31 +08:00
|
|
|
int radeon_atom_get_clock_dividers(struct radeon_device *rdev,
|
|
|
|
u8 clock_type,
|
|
|
|
u32 clock,
|
|
|
|
bool strobe_mode,
|
|
|
|
struct atom_clock_dividers *dividers);
|
2013-02-14 05:38:25 +08:00
|
|
|
int radeon_atom_get_memory_pll_dividers(struct radeon_device *rdev,
|
|
|
|
u32 clock,
|
|
|
|
bool strobe_mode,
|
|
|
|
struct atom_mpll_param *mpll_param);
|
2011-04-13 02:49:23 +08:00
|
|
|
void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type);
|
2013-06-24 22:50:34 +08:00
|
|
|
int radeon_atom_get_voltage_gpio_settings(struct radeon_device *rdev,
|
|
|
|
u16 voltage_level, u8 voltage_type,
|
|
|
|
u32 *gpio_value, u32 *gpio_mask);
|
|
|
|
void radeon_atom_set_engine_dram_timings(struct radeon_device *rdev,
|
|
|
|
u32 eng_clock, u32 mem_clock);
|
|
|
|
int radeon_atom_get_voltage_step(struct radeon_device *rdev,
|
|
|
|
u8 voltage_type, u16 *voltage_step);
|
2013-04-13 02:04:10 +08:00
|
|
|
int radeon_atom_get_max_vddc(struct radeon_device *rdev, u8 voltage_type,
|
|
|
|
u16 voltage_id, u16 *voltage);
|
2013-02-20 06:14:43 +08:00
|
|
|
int radeon_atom_get_leakage_vddc_based_on_leakage_idx(struct radeon_device *rdev,
|
|
|
|
u16 *voltage,
|
|
|
|
u16 leakage_idx);
|
2013-08-14 13:03:41 +08:00
|
|
|
int radeon_atom_get_leakage_id_from_vbios(struct radeon_device *rdev,
|
|
|
|
u16 *leakage_id);
|
|
|
|
int radeon_atom_get_leakage_vddc_based_on_leakage_params(struct radeon_device *rdev,
|
|
|
|
u16 *vddc, u16 *vddci,
|
|
|
|
u16 virtual_voltage_id,
|
|
|
|
u16 vbios_voltage_id);
|
2014-08-01 05:57:42 +08:00
|
|
|
int radeon_atom_get_voltage_evv(struct radeon_device *rdev,
|
|
|
|
u16 virtual_voltage_id,
|
|
|
|
u16 *voltage);
|
2013-06-24 22:50:34 +08:00
|
|
|
int radeon_atom_round_to_true_voltage(struct radeon_device *rdev,
|
|
|
|
u8 voltage_type,
|
|
|
|
u16 nominal_voltage,
|
|
|
|
u16 *true_voltage);
|
|
|
|
int radeon_atom_get_min_voltage(struct radeon_device *rdev,
|
|
|
|
u8 voltage_type, u16 *min_voltage);
|
|
|
|
int radeon_atom_get_max_voltage(struct radeon_device *rdev,
|
|
|
|
u8 voltage_type, u16 *max_voltage);
|
|
|
|
int radeon_atom_get_voltage_table(struct radeon_device *rdev,
|
2013-02-14 06:29:54 +08:00
|
|
|
u8 voltage_type, u8 voltage_mode,
|
2013-06-24 22:50:34 +08:00
|
|
|
struct atom_voltage_table *voltage_table);
|
2013-02-14 06:04:59 +08:00
|
|
|
bool radeon_atom_is_voltage_gpio(struct radeon_device *rdev,
|
|
|
|
u8 voltage_type, u8 voltage_mode);
|
2014-06-07 06:43:45 +08:00
|
|
|
int radeon_atom_get_svi2_info(struct radeon_device *rdev,
|
|
|
|
u8 voltage_type,
|
|
|
|
u8 *svd_gpio_id, u8 *svc_gpio_id);
|
2013-06-24 22:50:34 +08:00
|
|
|
void radeon_atom_update_memory_dll(struct radeon_device *rdev,
|
|
|
|
u32 mem_clock);
|
|
|
|
void radeon_atom_set_ac_timing(struct radeon_device *rdev,
|
|
|
|
u32 mem_clock);
|
|
|
|
int radeon_atom_init_mc_reg_table(struct radeon_device *rdev,
|
|
|
|
u8 module_index,
|
|
|
|
struct atom_mc_reg_table *reg_table);
|
|
|
|
int radeon_atom_get_memory_info(struct radeon_device *rdev,
|
|
|
|
u8 module_index, struct atom_memory_info *mem_info);
|
|
|
|
int radeon_atom_get_mclk_range_table(struct radeon_device *rdev,
|
|
|
|
bool gddr5, u8 module_index,
|
|
|
|
struct atom_memory_clock_range_table *mclk_range_table);
|
|
|
|
int radeon_atom_get_max_vddc(struct radeon_device *rdev, u8 voltage_type,
|
|
|
|
u16 voltage_id, u16 *voltage);
|
2010-07-01 00:02:03 +08:00
|
|
|
void rs690_pm_info(struct radeon_device *rdev);
|
2011-12-17 06:03:42 +08:00
|
|
|
extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
|
|
|
|
unsigned *bankh, unsigned *mtaspect,
|
|
|
|
unsigned *tile_split);
|
2009-09-08 08:10:24 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Fences.
|
|
|
|
*/
|
|
|
|
struct radeon_fence_driver {
|
2014-08-27 21:21:58 +08:00
|
|
|
struct radeon_device *rdev;
|
2009-06-05 20:42:42 +08:00
|
|
|
uint32_t scratch_reg;
|
2011-11-21 04:45:34 +08:00
|
|
|
uint64_t gpu_addr;
|
|
|
|
volatile uint32_t *cpu_addr;
|
2012-05-10 21:57:31 +08:00
|
|
|
/* sync_seq is protected by ring emission lock */
|
|
|
|
uint64_t sync_seq[RADEON_NUM_RINGS];
|
2012-05-09 21:34:46 +08:00
|
|
|
atomic64_t last_seq;
|
2014-01-09 18:03:12 +08:00
|
|
|
bool initialized, delayed_irq;
|
2014-08-27 21:21:58 +08:00
|
|
|
struct delayed_work lockup_work;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_fence {
|
2016-10-25 20:00:45 +08:00
|
|
|
struct dma_fence base;
|
2014-01-09 18:03:12 +08:00
|
|
|
|
2014-11-19 21:01:24 +08:00
|
|
|
struct radeon_device *rdev;
|
|
|
|
uint64_t seq;
|
2011-08-26 01:39:48 +08:00
|
|
|
/* RB, DMA, etc. */
|
2014-11-19 21:01:24 +08:00
|
|
|
unsigned ring;
|
|
|
|
bool is_vm_update;
|
2014-01-09 18:03:12 +08:00
|
|
|
|
2014-11-19 21:01:24 +08:00
|
|
|
wait_queue_t fence_wake;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2011-11-21 04:45:34 +08:00
|
|
|
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
|
|
|
|
int radeon_fence_driver_init(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void radeon_fence_driver_fini(struct radeon_device *rdev);
|
2014-08-27 21:21:56 +08:00
|
|
|
void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
|
2012-05-08 20:24:01 +08:00
|
|
|
int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
|
2011-08-26 01:39:48 +08:00
|
|
|
void radeon_fence_process(struct radeon_device *rdev, int ring);
|
2009-06-05 20:42:42 +08:00
|
|
|
bool radeon_fence_signaled(struct radeon_fence *fence);
|
2016-02-08 05:51:12 +08:00
|
|
|
long radeon_fence_wait_timeout(struct radeon_fence *fence, bool interruptible, long timeout);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
|
2014-02-18 22:58:31 +08:00
|
|
|
int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
|
|
|
|
int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
|
2012-05-09 21:34:55 +08:00
|
|
|
int radeon_fence_wait_any(struct radeon_device *rdev,
|
|
|
|
struct radeon_fence **fences,
|
|
|
|
bool intr);
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
|
|
|
|
void radeon_fence_unref(struct radeon_fence **fence);
|
2012-05-09 21:34:47 +08:00
|
|
|
unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
|
2012-05-10 21:57:31 +08:00
|
|
|
bool radeon_fence_need_sync(struct radeon_fence *fence, int ring);
|
|
|
|
void radeon_fence_note_sync(struct radeon_fence *fence, int ring);
|
|
|
|
static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a,
|
|
|
|
struct radeon_fence *b)
|
|
|
|
{
|
|
|
|
if (!a) {
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!b) {
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
BUG_ON(a->ring != b->ring);
|
|
|
|
|
|
|
|
if (a->seq > b->seq) {
|
|
|
|
return a;
|
|
|
|
} else {
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
}
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2012-08-09 22:21:08 +08:00
|
|
|
static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
|
|
|
|
struct radeon_fence *b)
|
|
|
|
{
|
|
|
|
if (!a) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!b) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
BUG_ON(a->ring != b->ring);
|
|
|
|
|
|
|
|
return a->seq < b->seq;
|
|
|
|
}
|
|
|
|
|
2009-06-24 07:48:08 +08:00
|
|
|
/*
|
|
|
|
* Tiling registers
|
|
|
|
*/
|
|
|
|
struct radeon_surface_reg {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *bo;
|
2009-06-24 07:48:08 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define RADEON_GEM_MAX_SURFACES 8
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
2009-11-20 21:29:23 +08:00
|
|
|
* TTM.
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_mman {
|
|
|
|
struct ttm_bo_global_ref bo_global_ref;
|
2010-03-09 08:56:52 +08:00
|
|
|
struct drm_global_reference mem_global_ref;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct ttm_bo_device bdev;
|
2009-12-12 03:36:19 +08:00
|
|
|
bool mem_global_referenced;
|
|
|
|
bool initialized;
|
2013-12-19 04:07:39 +08:00
|
|
|
|
|
|
|
#if defined(CONFIG_DEBUG_FS)
|
|
|
|
struct dentry *vram;
|
2013-12-19 04:07:40 +08:00
|
|
|
struct dentry *gtt;
|
2013-12-19 04:07:39 +08:00
|
|
|
#endif
|
2009-11-20 21:29:23 +08:00
|
|
|
};
|
|
|
|
|
2014-11-27 21:48:42 +08:00
|
|
|
struct radeon_bo_list {
|
|
|
|
struct radeon_bo *robj;
|
|
|
|
struct ttm_validate_buffer tv;
|
|
|
|
uint64_t gpu_offset;
|
|
|
|
unsigned prefered_domains;
|
|
|
|
unsigned allowed_domains;
|
|
|
|
uint32_t tiling_flags;
|
|
|
|
};
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* bo virtual address in a specific vm */
|
|
|
|
struct radeon_bo_va {
|
2012-09-11 22:10:04 +08:00
|
|
|
/* protected by bo being reserved */
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
struct list_head bo_list;
|
|
|
|
uint32_t flags;
|
2014-11-19 21:01:26 +08:00
|
|
|
struct radeon_fence *last_pt_update;
|
2012-09-11 22:10:04 +08:00
|
|
|
unsigned ref_count;
|
|
|
|
|
|
|
|
/* protected by vm mutex */
|
2014-07-30 23:49:56 +08:00
|
|
|
struct interval_tree_node it;
|
2014-07-18 14:56:40 +08:00
|
|
|
struct list_head vm_status;
|
2012-09-11 22:10:04 +08:00
|
|
|
|
|
|
|
/* constant after initialization */
|
|
|
|
struct radeon_vm *vm;
|
|
|
|
struct radeon_bo *bo;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
};
|
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo {
|
|
|
|
/* Protected by gem.mutex */
|
|
|
|
struct list_head list;
|
|
|
|
/* Protected by tbo.reserved */
|
2014-03-02 07:56:17 +08:00
|
|
|
u32 initial_domain;
|
2014-10-10 11:28:36 +08:00
|
|
|
struct ttm_place placements[4];
|
2009-12-07 22:52:58 +08:00
|
|
|
struct ttm_placement placement;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct ttm_buffer_object tbo;
|
|
|
|
struct ttm_bo_kmap_obj kmap;
|
2014-07-17 18:01:08 +08:00
|
|
|
u32 flags;
|
2009-11-20 21:29:23 +08:00
|
|
|
unsigned pin_count;
|
|
|
|
void *kptr;
|
|
|
|
u32 tiling_flags;
|
|
|
|
u32 pitch;
|
|
|
|
int surface_reg;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* list of all virtual address to which this bo
|
|
|
|
* is associated to
|
|
|
|
*/
|
|
|
|
struct list_head va;
|
2009-11-20 21:29:23 +08:00
|
|
|
/* Constant after initialization */
|
|
|
|
struct radeon_device *rdev;
|
2011-02-19 00:59:16 +08:00
|
|
|
struct drm_gem_object gem_base;
|
2012-05-31 20:52:53 +08:00
|
|
|
|
2013-04-26 10:29:27 +08:00
|
|
|
struct ttm_bo_kmap_obj dma_buf_vmap;
|
|
|
|
pid_t pid;
|
2014-08-07 15:36:03 +08:00
|
|
|
|
|
|
|
struct radeon_mn *mn;
|
2015-03-31 23:37:00 +08:00
|
|
|
struct list_head mn_list;
|
2009-11-20 21:29:23 +08:00
|
|
|
};
|
2011-02-19 00:59:17 +08:00
|
|
|
#define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2013-04-26 10:29:27 +08:00
|
|
|
int radeon_gem_debugfs_init(struct radeon_device *rdev);
|
|
|
|
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
/* sub-allocation manager, it has to be protected by another lock.
|
|
|
|
* By conception this is an helper for other part of the driver
|
|
|
|
* like the indirect buffer or semaphore, which both have their
|
|
|
|
* locking.
|
|
|
|
*
|
|
|
|
* Principe is simple, we keep a list of sub allocation in offset
|
|
|
|
* order (first entry has offset == 0, last entry has the highest
|
|
|
|
* offset).
|
|
|
|
*
|
|
|
|
* When allocating new object we first check if there is room at
|
|
|
|
* the end total_size - (last_object_offset + last_object_size) >=
|
|
|
|
* alloc_size. If so we allocate new object there.
|
|
|
|
*
|
|
|
|
* When there is not enough room at the end, we start waiting for
|
|
|
|
* each sub object until we reach object_offset+object_size >=
|
|
|
|
* alloc_size, this object then become the sub object we return.
|
|
|
|
*
|
|
|
|
* Alignment can't be bigger than page size.
|
|
|
|
*
|
|
|
|
* Hole are not considered for allocation to keep things simple.
|
|
|
|
* Assumption is that there won't be hole (all object on same
|
|
|
|
* alignment).
|
|
|
|
*/
|
|
|
|
struct radeon_sa_manager {
|
2012-07-12 03:07:57 +08:00
|
|
|
wait_queue_head_t wq;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
struct radeon_bo *bo;
|
2012-05-09 21:34:56 +08:00
|
|
|
struct list_head *hole;
|
|
|
|
struct list_head flist[RADEON_NUM_RINGS];
|
|
|
|
struct list_head olist;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
unsigned size;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
void *cpu_ptr;
|
|
|
|
uint32_t domain;
|
2013-07-13 03:46:09 +08:00
|
|
|
uint32_t align;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_sa_bo;
|
|
|
|
|
|
|
|
/* sub-allocation buffer */
|
|
|
|
struct radeon_sa_bo {
|
2012-05-09 21:34:56 +08:00
|
|
|
struct list_head olist;
|
|
|
|
struct list_head flist;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
struct radeon_sa_manager *manager;
|
2012-05-09 21:34:52 +08:00
|
|
|
unsigned soffset;
|
|
|
|
unsigned eoffset;
|
2012-05-09 21:34:54 +08:00
|
|
|
struct radeon_fence *fence;
|
drm/radeon: introduce a sub allocator and convert ib pool to it v4
Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.
v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-11-16 00:48:34 +08:00
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* GEM objects.
|
|
|
|
*/
|
|
|
|
struct radeon_gem {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct mutex mutex;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct list_head objects;
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_gem_init(struct radeon_device *rdev);
|
|
|
|
void radeon_gem_fini(struct radeon_device *rdev);
|
2014-07-18 00:26:29 +08:00
|
|
|
int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size,
|
2009-11-20 21:29:23 +08:00
|
|
|
int alignment, int initial_domain,
|
2014-07-21 19:27:27 +08:00
|
|
|
u32 flags, bool kernel,
|
2009-11-20 21:29:23 +08:00
|
|
|
struct drm_gem_object **obj);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-02-07 10:16:14 +08:00
|
|
|
int radeon_mode_dumb_create(struct drm_file *file_priv,
|
|
|
|
struct drm_device *dev,
|
|
|
|
struct drm_mode_create_dumb *args);
|
|
|
|
int radeon_mode_dumb_mmap(struct drm_file *filp,
|
|
|
|
struct drm_device *dev,
|
|
|
|
uint32_t handle, uint64_t *offset_p);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-12-22 01:13:47 +08:00
|
|
|
/*
|
|
|
|
* Semaphores.
|
|
|
|
*/
|
|
|
|
struct radeon_semaphore {
|
2014-11-19 21:01:22 +08:00
|
|
|
struct radeon_sa_bo *sa_bo;
|
|
|
|
signed waiters;
|
|
|
|
uint64_t gpu_addr;
|
2011-12-22 01:13:47 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_semaphore_create(struct radeon_device *rdev,
|
|
|
|
struct radeon_semaphore **semaphore);
|
2013-11-12 19:58:05 +08:00
|
|
|
bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
|
2011-12-22 01:13:47 +08:00
|
|
|
struct radeon_semaphore *semaphore);
|
2013-11-12 19:58:05 +08:00
|
|
|
bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
|
2011-12-22 01:13:47 +08:00
|
|
|
struct radeon_semaphore *semaphore);
|
|
|
|
void radeon_semaphore_free(struct radeon_device *rdev,
|
2012-05-10 22:46:43 +08:00
|
|
|
struct radeon_semaphore **semaphore,
|
2012-05-09 21:34:57 +08:00
|
|
|
struct radeon_fence *fence);
|
2011-12-22 01:13:47 +08:00
|
|
|
|
2014-11-19 21:01:22 +08:00
|
|
|
/*
|
|
|
|
* Synchronization
|
|
|
|
*/
|
|
|
|
struct radeon_sync {
|
|
|
|
struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS];
|
|
|
|
struct radeon_fence *sync_to[RADEON_NUM_RINGS];
|
2014-11-19 21:01:24 +08:00
|
|
|
struct radeon_fence *last_vm_update;
|
2014-11-19 21:01:22 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
void radeon_sync_create(struct radeon_sync *sync);
|
|
|
|
void radeon_sync_fence(struct radeon_sync *sync,
|
|
|
|
struct radeon_fence *fence);
|
|
|
|
int radeon_sync_resv(struct radeon_device *rdev,
|
|
|
|
struct radeon_sync *sync,
|
|
|
|
struct reservation_object *resv,
|
|
|
|
bool shared);
|
|
|
|
int radeon_sync_rings(struct radeon_device *rdev,
|
|
|
|
struct radeon_sync *sync,
|
|
|
|
int waiting_ring);
|
|
|
|
void radeon_sync_free(struct radeon_device *rdev, struct radeon_sync *sync,
|
|
|
|
struct radeon_fence *fence);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* GART structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_mc;
|
|
|
|
|
2009-10-14 12:34:41 +08:00
|
|
|
#define RADEON_GPU_PAGE_SIZE 4096
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
#define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1)
|
2011-09-17 00:04:08 +08:00
|
|
|
#define RADEON_GPU_PAGE_SHIFT 12
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
#define RADEON_GPU_PAGE_ALIGN(a) (((a) + RADEON_GPU_PAGE_MASK) & ~RADEON_GPU_PAGE_MASK)
|
2009-10-14 12:34:41 +08:00
|
|
|
|
2014-07-17 18:01:07 +08:00
|
|
|
#define RADEON_GART_PAGE_DUMMY 0
|
|
|
|
#define RADEON_GART_PAGE_VALID (1 << 0)
|
|
|
|
#define RADEON_GART_PAGE_READ (1 << 1)
|
|
|
|
#define RADEON_GART_PAGE_WRITE (1 << 2)
|
|
|
|
#define RADEON_GART_PAGE_SNOOP (1 << 3)
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_gart {
|
|
|
|
dma_addr_t table_addr;
|
2011-11-03 23:16:49 +08:00
|
|
|
struct radeon_bo *robj;
|
|
|
|
void *ptr;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned num_gpu_pages;
|
|
|
|
unsigned num_cpu_pages;
|
|
|
|
unsigned table_size;
|
|
|
|
struct page **pages;
|
2015-01-21 16:36:35 +08:00
|
|
|
uint64_t *pages_entry;
|
2009-06-05 20:42:42 +08:00
|
|
|
bool ready;
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_gart_table_ram_alloc(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_ram_free(struct radeon_device *rdev);
|
|
|
|
int radeon_gart_table_vram_alloc(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_vram_free(struct radeon_device *rdev);
|
2011-11-03 23:16:49 +08:00
|
|
|
int radeon_gart_table_vram_pin(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_table_vram_unpin(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_gart_init(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_fini(struct radeon_device *rdev);
|
|
|
|
void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
|
|
|
|
int pages);
|
|
|
|
int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
|
2010-12-03 00:04:29 +08:00
|
|
|
int pages, struct page **pagelist,
|
2014-07-17 18:01:07 +08:00
|
|
|
dma_addr_t *dma_addr, uint32_t flags);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* GPU MC structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_mc {
|
|
|
|
resource_size_t aper_size;
|
|
|
|
resource_size_t aper_base;
|
|
|
|
resource_size_t agp_base;
|
2009-07-21 18:39:30 +08:00
|
|
|
/* for some chips with <= 32MB we need to lie
|
|
|
|
* about vram size near mc fb location */
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 mc_vram_size;
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
u64 visible_vram_size;
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 gtt_size;
|
|
|
|
u64 gtt_start;
|
|
|
|
u64 gtt_end;
|
|
|
|
u64 vram_start;
|
|
|
|
u64 vram_end;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned vram_width;
|
2009-09-08 08:10:24 +08:00
|
|
|
u64 real_vram_size;
|
2009-06-05 20:42:42 +08:00
|
|
|
int vram_mtrr;
|
|
|
|
bool vram_is_ddr;
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
bool igp_sideport_enabled;
|
2010-07-15 22:51:10 +08:00
|
|
|
u64 gtt_base_align;
|
2013-04-08 23:13:01 +08:00
|
|
|
u64 mc_mask;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2010-01-06 00:27:29 +08:00
|
|
|
bool radeon_combios_sideport_present(struct radeon_device *rdev);
|
|
|
|
bool radeon_atombios_sideport_present(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* GPU scratch registers structures, functions & helpers
|
|
|
|
*/
|
|
|
|
struct radeon_scratch {
|
|
|
|
unsigned num_reg;
|
2010-08-28 06:25:25 +08:00
|
|
|
uint32_t reg_base;
|
2009-06-05 20:42:42 +08:00
|
|
|
bool free[32];
|
|
|
|
uint32_t reg[32];
|
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg);
|
|
|
|
void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg);
|
|
|
|
|
2013-03-05 01:47:46 +08:00
|
|
|
/*
|
|
|
|
* GPU doorbell structures, functions & helpers
|
|
|
|
*/
|
2013-11-14 04:54:17 +08:00
|
|
|
#define RADEON_MAX_DOORBELLS 1024 /* Reserve at most 1024 doorbell slots for radeon-owned rings. */
|
|
|
|
|
2013-03-05 01:47:46 +08:00
|
|
|
struct radeon_doorbell {
|
|
|
|
/* doorbell mmio */
|
2013-11-14 04:54:17 +08:00
|
|
|
resource_size_t base;
|
|
|
|
resource_size_t size;
|
|
|
|
u32 __iomem *ptr;
|
|
|
|
u32 num_doorbells; /* Number of doorbells actually reserved for radeon. */
|
2015-05-20 09:37:52 +08:00
|
|
|
DECLARE_BITMAP(used, RADEON_MAX_DOORBELLS);
|
2013-03-05 01:47:46 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_doorbell_get(struct radeon_device *rdev, u32 *page);
|
|
|
|
void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell);
|
2014-01-28 20:43:19 +08:00
|
|
|
void radeon_doorbell_get_kfd_info(struct radeon_device *rdev,
|
|
|
|
phys_addr_t *aperture_base,
|
|
|
|
size_t *aperture_size,
|
|
|
|
size_t *start_offset);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* IRQS.
|
|
|
|
*/
|
2010-11-21 23:59:01 +08:00
|
|
|
|
2014-06-04 06:13:21 +08:00
|
|
|
struct radeon_flip_work {
|
|
|
|
struct work_struct flip_work;
|
|
|
|
struct work_struct unpin_work;
|
|
|
|
struct radeon_device *rdev;
|
|
|
|
int crtc_id;
|
2016-08-04 11:39:39 +08:00
|
|
|
u32 target_vblank;
|
2014-07-14 14:48:42 +08:00
|
|
|
uint64_t base;
|
2010-11-21 23:59:01 +08:00
|
|
|
struct drm_pending_vblank_event *event;
|
2014-06-04 06:13:21 +08:00
|
|
|
struct radeon_bo *old_rbo;
|
2016-10-25 20:00:45 +08:00
|
|
|
struct dma_fence *fence;
|
2016-04-01 17:51:34 +08:00
|
|
|
bool async;
|
2010-11-21 23:59:01 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct r500_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
2012-03-30 20:59:57 +08:00
|
|
|
u32 hdmi0_status;
|
2010-11-21 23:59:01 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct r600_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
|
|
|
u32 disp_int_cont;
|
|
|
|
u32 disp_int_cont2;
|
|
|
|
u32 d1grph_int;
|
|
|
|
u32 d2grph_int;
|
2012-03-30 20:59:57 +08:00
|
|
|
u32 hdmi0_status;
|
|
|
|
u32 hdmi1_status;
|
2010-11-21 23:59:01 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct evergreen_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
|
|
|
u32 disp_int_cont;
|
|
|
|
u32 disp_int_cont2;
|
|
|
|
u32 disp_int_cont3;
|
|
|
|
u32 disp_int_cont4;
|
|
|
|
u32 disp_int_cont5;
|
|
|
|
u32 d1grph_int;
|
|
|
|
u32 d2grph_int;
|
|
|
|
u32 d3grph_int;
|
|
|
|
u32 d4grph_int;
|
|
|
|
u32 d5grph_int;
|
|
|
|
u32 d6grph_int;
|
2012-03-30 20:59:57 +08:00
|
|
|
u32 afmt_status1;
|
|
|
|
u32 afmt_status2;
|
|
|
|
u32 afmt_status3;
|
|
|
|
u32 afmt_status4;
|
|
|
|
u32 afmt_status5;
|
|
|
|
u32 afmt_status6;
|
2010-11-21 23:59:01 +08:00
|
|
|
};
|
|
|
|
|
2012-11-09 23:45:57 +08:00
|
|
|
struct cik_irq_stat_regs {
|
|
|
|
u32 disp_int;
|
|
|
|
u32 disp_int_cont;
|
|
|
|
u32 disp_int_cont2;
|
|
|
|
u32 disp_int_cont3;
|
|
|
|
u32 disp_int_cont4;
|
|
|
|
u32 disp_int_cont5;
|
|
|
|
u32 disp_int_cont6;
|
2014-04-24 02:46:06 +08:00
|
|
|
u32 d1grph_int;
|
|
|
|
u32 d2grph_int;
|
|
|
|
u32 d3grph_int;
|
|
|
|
u32 d4grph_int;
|
|
|
|
u32 d5grph_int;
|
|
|
|
u32 d6grph_int;
|
2012-11-09 23:45:57 +08:00
|
|
|
};
|
|
|
|
|
2010-11-21 23:59:01 +08:00
|
|
|
union radeon_irq_stat_regs {
|
|
|
|
struct r500_irq_stat_regs r500;
|
|
|
|
struct r600_irq_stat_regs r600;
|
|
|
|
struct evergreen_irq_stat_regs evergreen;
|
2012-11-09 23:45:57 +08:00
|
|
|
struct cik_irq_stat_regs cik;
|
2010-11-21 23:59:01 +08:00
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_irq {
|
2012-05-17 07:33:30 +08:00
|
|
|
bool installed;
|
|
|
|
spinlock_t lock;
|
2012-05-18 01:52:00 +08:00
|
|
|
atomic_t ring_int[RADEON_NUM_RINGS];
|
2012-05-17 07:33:30 +08:00
|
|
|
bool crtc_vblank_int[RADEON_MAX_CRTCS];
|
2012-05-18 01:52:00 +08:00
|
|
|
atomic_t pflip[RADEON_MAX_CRTCS];
|
2012-05-17 07:33:30 +08:00
|
|
|
wait_queue_head_t vblank_queue;
|
|
|
|
bool hpd[RADEON_MAX_HPD_PINS];
|
|
|
|
bool afmt[RADEON_MAX_AFMT_BLOCKS];
|
|
|
|
union radeon_irq_stat_regs stat_regs;
|
2013-04-13 02:04:10 +08:00
|
|
|
bool dpm_thermal;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_irq_kms_init(struct radeon_device *rdev);
|
|
|
|
void radeon_irq_kms_fini(struct radeon_device *rdev);
|
2011-11-18 09:13:28 +08:00
|
|
|
void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
|
2014-01-09 18:03:12 +08:00
|
|
|
bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring);
|
2011-11-18 09:13:28 +08:00
|
|
|
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
|
2010-11-21 23:59:01 +08:00
|
|
|
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
|
|
|
|
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
|
2012-05-17 07:33:30 +08:00
|
|
|
void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block);
|
|
|
|
void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block);
|
|
|
|
void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask);
|
|
|
|
void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
2011-10-23 18:56:27 +08:00
|
|
|
* CP & rings.
|
2009-06-05 20:42:42 +08:00
|
|
|
*/
|
2011-08-26 01:39:48 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_ib {
|
2012-05-09 21:35:00 +08:00
|
|
|
struct radeon_sa_bo *sa_bo;
|
|
|
|
uint32_t length_dw;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t *ptr;
|
2012-05-08 20:24:01 +08:00
|
|
|
int ring;
|
2012-05-09 21:35:00 +08:00
|
|
|
struct radeon_fence *fence;
|
2012-08-07 00:57:44 +08:00
|
|
|
struct radeon_vm *vm;
|
2012-05-09 21:35:00 +08:00
|
|
|
bool is_const_ib;
|
2014-11-19 21:01:22 +08:00
|
|
|
struct radeon_sync sync;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *ring_obj;
|
2009-06-05 20:42:42 +08:00
|
|
|
volatile uint32_t *ring;
|
2011-10-13 18:48:45 +08:00
|
|
|
unsigned rptr_offs;
|
2012-07-06 22:22:55 +08:00
|
|
|
unsigned rptr_save_reg;
|
2012-07-18 02:02:31 +08:00
|
|
|
u64 next_rptr_gpu_addr;
|
|
|
|
volatile u32 *next_rptr_cpu_addr;
|
2009-06-05 20:42:42 +08:00
|
|
|
unsigned wptr;
|
|
|
|
unsigned wptr_old;
|
|
|
|
unsigned ring_size;
|
|
|
|
unsigned ring_free_dw;
|
|
|
|
int count_dw;
|
2014-02-18 22:24:06 +08:00
|
|
|
atomic_t last_rptr;
|
|
|
|
atomic64_t last_activity;
|
2009-06-05 20:42:42 +08:00
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t align_mask;
|
|
|
|
uint32_t ptr_mask;
|
|
|
|
bool ready;
|
2011-11-18 03:25:56 +08:00
|
|
|
u32 nop;
|
2012-07-18 02:02:30 +08:00
|
|
|
u32 idx;
|
2013-01-12 04:19:43 +08:00
|
|
|
u64 last_semaphore_signal_addr;
|
|
|
|
u64 last_semaphore_wait_addr;
|
2013-06-27 05:37:11 +08:00
|
|
|
/* for CIK queues */
|
|
|
|
u32 me;
|
|
|
|
u32 pipe;
|
|
|
|
u32 queue;
|
|
|
|
struct radeon_bo *mqd_obj;
|
2013-11-14 04:54:17 +08:00
|
|
|
u32 doorbell_index;
|
2013-06-27 05:37:11 +08:00
|
|
|
unsigned wptr_offs;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_mec {
|
|
|
|
struct radeon_bo *hpd_eop_obj;
|
|
|
|
u64 hpd_eop_gpu_addr;
|
|
|
|
u32 num_pipe;
|
|
|
|
u32 num_mec;
|
|
|
|
u32 num_queue;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/*
|
|
|
|
* VM
|
|
|
|
*/
|
2012-08-09 22:21:08 +08:00
|
|
|
|
drm/radeon: add 2-level VM pagetables support v9
PDE/PTE update code uses CP ring for memory writes.
All page table entries are preallocated for now in alloc_pt().
It is made as whole because it's hard to divide it to several patches
that compile and doesn't break anything being applied separately.
Tested on cayman card.
v2: rebased on top of "refactor set_page chipset interface v3",
code cleanups
v3: switched offsets calc macros to inline funcs where possible,
remove pd_addr from radeon_vm, switched RADEON_BLOCK_SIZE define,
to 9 (and PTE_COUNT to 1 << BLOCK_SIZE)
v4 (ck): move "incr" documentation to previous patch, cleanup and
document RADEON_VM_* constants, change commit message to
our usual format, simplify patch allot by removing
everything current not necessary, disable SI workaround.
v5: (agd5f): Fix typo in tables_size calculation in
radeon_vm_alloc_pt(). Second line should have been
'+=' rather than '='.
v6: fix npdes calculation. In scenario when pfns to be mapped overlap
two PDE spans:
+-----------+-------------+
| PDE span | PDE span |
+-----------+----+--------+
| |
+---------+
| pfns |
+---------+
the following npdes calculation gives incorrect result:
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 1;
For the case above picture it should give npdes = 2, but gives one.
This patch corrects it by rounding last pfn up to 512 border,
first - down to 512 border and then subtracting and dividing by 512.
v7: Make npde calculation clearer, fix ndw calculation.
v8: (agd5f): reserve enough for 2 full VM PTs, add some
additional comments.
v9: fix typo in npde calculation
Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2012-09-18 01:36:19 +08:00
|
|
|
/* maximum number of VMIDs */
|
2012-08-09 22:21:08 +08:00
|
|
|
#define RADEON_NUM_VM 16
|
|
|
|
|
drm/radeon: add 2-level VM pagetables support v9
PDE/PTE update code uses CP ring for memory writes.
All page table entries are preallocated for now in alloc_pt().
It is made as whole because it's hard to divide it to several patches
that compile and doesn't break anything being applied separately.
Tested on cayman card.
v2: rebased on top of "refactor set_page chipset interface v3",
code cleanups
v3: switched offsets calc macros to inline funcs where possible,
remove pd_addr from radeon_vm, switched RADEON_BLOCK_SIZE define,
to 9 (and PTE_COUNT to 1 << BLOCK_SIZE)
v4 (ck): move "incr" documentation to previous patch, cleanup and
document RADEON_VM_* constants, change commit message to
our usual format, simplify patch allot by removing
everything current not necessary, disable SI workaround.
v5: (agd5f): Fix typo in tables_size calculation in
radeon_vm_alloc_pt(). Second line should have been
'+=' rather than '='.
v6: fix npdes calculation. In scenario when pfns to be mapped overlap
two PDE spans:
+-----------+-------------+
| PDE span | PDE span |
+-----------+----+--------+
| |
+---------+
| pfns |
+---------+
the following npdes calculation gives incorrect result:
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 1;
For the case above picture it should give npdes = 2, but gives one.
This patch corrects it by rounding last pfn up to 512 border,
first - down to 512 border and then subtracting and dividing by 512.
v7: Make npde calculation clearer, fix ndw calculation.
v8: (agd5f): reserve enough for 2 full VM PTs, add some
additional comments.
v9: fix typo in npde calculation
Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2012-09-18 01:36:19 +08:00
|
|
|
/* number of entries in page table */
|
2014-06-06 11:56:50 +08:00
|
|
|
#define RADEON_VM_PTE_COUNT (1 << radeon_vm_block_size)
|
drm/radeon: add 2-level VM pagetables support v9
PDE/PTE update code uses CP ring for memory writes.
All page table entries are preallocated for now in alloc_pt().
It is made as whole because it's hard to divide it to several patches
that compile and doesn't break anything being applied separately.
Tested on cayman card.
v2: rebased on top of "refactor set_page chipset interface v3",
code cleanups
v3: switched offsets calc macros to inline funcs where possible,
remove pd_addr from radeon_vm, switched RADEON_BLOCK_SIZE define,
to 9 (and PTE_COUNT to 1 << BLOCK_SIZE)
v4 (ck): move "incr" documentation to previous patch, cleanup and
document RADEON_VM_* constants, change commit message to
our usual format, simplify patch allot by removing
everything current not necessary, disable SI workaround.
v5: (agd5f): Fix typo in tables_size calculation in
radeon_vm_alloc_pt(). Second line should have been
'+=' rather than '='.
v6: fix npdes calculation. In scenario when pfns to be mapped overlap
two PDE spans:
+-----------+-------------+
| PDE span | PDE span |
+-----------+----+--------+
| |
+---------+
| pfns |
+---------+
the following npdes calculation gives incorrect result:
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 1;
For the case above picture it should give npdes = 2, but gives one.
This patch corrects it by rounding last pfn up to 512 border,
first - down to 512 border and then subtracting and dividing by 512.
v7: Make npde calculation clearer, fix ndw calculation.
v8: (agd5f): reserve enough for 2 full VM PTs, add some
additional comments.
v9: fix typo in npde calculation
Signed-off-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2012-09-18 01:36:19 +08:00
|
|
|
|
2013-07-13 03:56:02 +08:00
|
|
|
/* PTBs (Page Table Blocks) need to be aligned to 32K */
|
|
|
|
#define RADEON_VM_PTB_ALIGN_SIZE 32768
|
|
|
|
#define RADEON_VM_PTB_ALIGN_MASK (RADEON_VM_PTB_ALIGN_SIZE - 1)
|
|
|
|
#define RADEON_VM_PTB_ALIGN(a) (((a) + RADEON_VM_PTB_ALIGN_MASK) & ~RADEON_VM_PTB_ALIGN_MASK)
|
|
|
|
|
2013-10-30 23:51:09 +08:00
|
|
|
#define R600_PTE_VALID (1 << 0)
|
|
|
|
#define R600_PTE_SYSTEM (1 << 1)
|
|
|
|
#define R600_PTE_SNOOPED (1 << 2)
|
|
|
|
#define R600_PTE_READABLE (1 << 5)
|
|
|
|
#define R600_PTE_WRITEABLE (1 << 6)
|
|
|
|
|
2014-05-10 18:17:55 +08:00
|
|
|
/* PTE (Page Table Entry) fragment field for different page sizes */
|
|
|
|
#define R600_PTE_FRAG_4KB (0 << 7)
|
|
|
|
#define R600_PTE_FRAG_64KB (4 << 7)
|
|
|
|
#define R600_PTE_FRAG_256KB (6 << 7)
|
|
|
|
|
2014-07-22 23:42:20 +08:00
|
|
|
/* flags needed to be set so we can copy directly from the GART table */
|
|
|
|
#define R600_PTE_GART_MASK ( R600_PTE_READABLE | R600_PTE_WRITEABLE | \
|
|
|
|
R600_PTE_SYSTEM | R600_PTE_VALID )
|
2014-05-27 22:47:37 +08:00
|
|
|
|
2014-02-20 20:42:17 +08:00
|
|
|
struct radeon_vm_pt {
|
|
|
|
struct radeon_bo *bo;
|
|
|
|
uint64_t addr;
|
|
|
|
};
|
|
|
|
|
2014-11-19 21:01:25 +08:00
|
|
|
struct radeon_vm_id {
|
|
|
|
unsigned id;
|
|
|
|
uint64_t pd_gpu_addr;
|
|
|
|
/* last flushed PD/PT update */
|
|
|
|
struct radeon_fence *flushed_updates;
|
|
|
|
/* last use of vmid */
|
|
|
|
struct radeon_fence *last_id_use;
|
|
|
|
};
|
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
struct radeon_vm {
|
2014-11-19 21:01:26 +08:00
|
|
|
struct mutex mutex;
|
|
|
|
|
2014-11-19 21:01:25 +08:00
|
|
|
struct rb_root va;
|
2012-10-09 19:31:17 +08:00
|
|
|
|
2014-11-27 21:48:44 +08:00
|
|
|
/* protecting invalidated and freed */
|
|
|
|
spinlock_t status_lock;
|
|
|
|
|
2014-07-18 15:24:53 +08:00
|
|
|
/* BOs moved, but not yet updated in the PT */
|
2014-11-19 21:01:25 +08:00
|
|
|
struct list_head invalidated;
|
2014-07-18 15:24:53 +08:00
|
|
|
|
2014-07-18 14:56:40 +08:00
|
|
|
/* BOs freed, but not yet updated in the PT */
|
2014-11-19 21:01:25 +08:00
|
|
|
struct list_head freed;
|
2014-07-18 14:56:40 +08:00
|
|
|
|
2015-05-26 18:24:15 +08:00
|
|
|
/* BOs cleared in the PT */
|
|
|
|
struct list_head cleared;
|
|
|
|
|
2012-10-09 19:31:17 +08:00
|
|
|
/* contains the page directory */
|
2014-11-19 21:01:25 +08:00
|
|
|
struct radeon_bo *page_directory;
|
|
|
|
unsigned max_pde_used;
|
2012-10-09 19:31:17 +08:00
|
|
|
|
|
|
|
/* array of page tables, one for each page directory entry */
|
2014-11-19 21:01:25 +08:00
|
|
|
struct radeon_vm_pt *page_tables;
|
2012-10-09 19:31:17 +08:00
|
|
|
|
2014-11-19 21:01:25 +08:00
|
|
|
struct radeon_bo_va *ib_bo_va;
|
2014-07-18 19:48:10 +08:00
|
|
|
|
2014-11-19 21:01:25 +08:00
|
|
|
/* for id and flush management per ring */
|
|
|
|
struct radeon_vm_id ids[RADEON_NUM_RINGS];
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_vm_manager {
|
2012-08-09 22:21:08 +08:00
|
|
|
struct radeon_fence *active[RADEON_NUM_VM];
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
uint32_t max_pfn;
|
|
|
|
/* number of VMIDs */
|
|
|
|
unsigned nvm;
|
|
|
|
/* vram base address for page table entry */
|
|
|
|
u64 vram_base_offset;
|
2012-01-06 22:38:15 +08:00
|
|
|
/* is vm enabled? */
|
|
|
|
bool enabled;
|
2014-08-26 20:45:54 +08:00
|
|
|
/* for hw to save the PD addr on suspend/resume */
|
|
|
|
uint32_t saved_table_addr[RADEON_NUM_VM];
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* file private structure
|
|
|
|
*/
|
|
|
|
struct radeon_fpriv {
|
|
|
|
struct radeon_vm vm;
|
|
|
|
};
|
|
|
|
|
2009-12-02 02:43:46 +08:00
|
|
|
/*
|
|
|
|
* R6xx+ IH ring
|
|
|
|
*/
|
|
|
|
struct r600_ih {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *ring_obj;
|
2009-12-02 02:43:46 +08:00
|
|
|
volatile uint32_t *ring;
|
|
|
|
unsigned rptr;
|
|
|
|
unsigned ring_size;
|
|
|
|
uint64_t gpu_addr;
|
|
|
|
uint32_t ptr_mask;
|
2012-05-17 03:45:24 +08:00
|
|
|
atomic_t lock;
|
2009-12-02 02:43:46 +08:00
|
|
|
bool enabled;
|
|
|
|
};
|
|
|
|
|
2012-03-21 05:18:21 +08:00
|
|
|
/*
|
2013-04-13 01:52:52 +08:00
|
|
|
* RLC stuff
|
2012-03-21 05:18:21 +08:00
|
|
|
*/
|
2013-04-13 01:52:52 +08:00
|
|
|
#include "clearstate_defs.h"
|
|
|
|
|
|
|
|
struct radeon_rlc {
|
2012-03-21 05:18:21 +08:00
|
|
|
/* for power gating */
|
|
|
|
struct radeon_bo *save_restore_obj;
|
|
|
|
uint64_t save_restore_gpu_addr;
|
2013-04-13 01:52:52 +08:00
|
|
|
volatile uint32_t *sr_ptr;
|
2013-04-18 05:53:50 +08:00
|
|
|
const u32 *reg_list;
|
2013-04-13 01:52:52 +08:00
|
|
|
u32 reg_list_size;
|
2012-03-21 05:18:21 +08:00
|
|
|
/* for clear state */
|
|
|
|
struct radeon_bo *clear_state_obj;
|
|
|
|
uint64_t clear_state_gpu_addr;
|
2013-04-13 01:52:52 +08:00
|
|
|
volatile uint32_t *cs_ptr;
|
2013-04-18 05:53:50 +08:00
|
|
|
const struct cs_section_def *cs_data;
|
2013-07-23 21:41:05 +08:00
|
|
|
u32 clear_state_size;
|
|
|
|
/* for cp tables */
|
|
|
|
struct radeon_bo *cp_table_obj;
|
|
|
|
uint64_t cp_table_gpu_addr;
|
|
|
|
volatile uint32_t *cp_table_ptr;
|
|
|
|
u32 cp_table_size;
|
2012-03-21 05:18:21 +08:00
|
|
|
};
|
|
|
|
|
2011-12-22 01:13:46 +08:00
|
|
|
int radeon_ib_get(struct radeon_device *rdev, int ring,
|
2012-08-07 00:57:44 +08:00
|
|
|
struct radeon_ib *ib, struct radeon_vm *vm,
|
|
|
|
unsigned size);
|
2012-05-09 21:35:02 +08:00
|
|
|
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
|
2012-07-13 19:06:00 +08:00
|
|
|
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
|
2014-08-18 16:34:55 +08:00
|
|
|
struct radeon_ib *const_ib, bool hdp_flush);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_ib_pool_init(struct radeon_device *rdev);
|
|
|
|
void radeon_ib_pool_fini(struct radeon_device *rdev);
|
2012-05-02 21:11:12 +08:00
|
|
|
int radeon_ib_ring_tests(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
/* Ring access between begin & end cannot sleep */
|
2012-07-18 02:02:31 +08:00
|
|
|
bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
|
|
|
|
struct radeon_ring *ring);
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
|
|
|
|
int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
|
2014-08-18 16:34:55 +08:00
|
|
|
void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp,
|
|
|
|
bool hdp_flush);
|
|
|
|
void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp,
|
|
|
|
bool hdp_flush);
|
2012-05-09 21:34:45 +08:00
|
|
|
void radeon_ring_undo(struct radeon_ring *ring);
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
|
2014-02-18 21:52:33 +08:00
|
|
|
void radeon_ring_lockup_update(struct radeon_device *rdev,
|
|
|
|
struct radeon_ring *ring);
|
2012-05-02 21:11:20 +08:00
|
|
|
bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
|
2012-07-09 17:52:44 +08:00
|
|
|
unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring,
|
|
|
|
uint32_t **data);
|
|
|
|
int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
|
|
|
|
unsigned size, uint32_t *data);
|
2011-10-23 18:56:27 +08:00
|
|
|
int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
|
2013-12-10 08:44:30 +08:00
|
|
|
unsigned rptr_offs, u32 nop);
|
2011-10-23 18:56:27 +08:00
|
|
|
void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
2012-09-28 03:08:35 +08:00
|
|
|
/* r600 async dma */
|
|
|
|
void r600_dma_stop(struct radeon_device *rdev);
|
|
|
|
int r600_dma_resume(struct radeon_device *rdev);
|
|
|
|
void r600_dma_fini(struct radeon_device *rdev);
|
|
|
|
|
2012-12-05 04:28:18 +08:00
|
|
|
void cayman_dma_stop(struct radeon_device *rdev);
|
|
|
|
int cayman_dma_resume(struct radeon_device *rdev);
|
|
|
|
void cayman_dma_fini(struct radeon_device *rdev);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* CS.
|
|
|
|
*/
|
|
|
|
struct radeon_cs_chunk {
|
|
|
|
uint32_t length_dw;
|
|
|
|
uint32_t *kdata;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
void __user *user_ptr;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_cs_parser {
|
2010-01-18 20:01:36 +08:00
|
|
|
struct device *dev;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_device *rdev;
|
|
|
|
struct drm_file *filp;
|
|
|
|
/* chunks */
|
|
|
|
unsigned nchunks;
|
|
|
|
struct radeon_cs_chunk *chunks;
|
|
|
|
uint64_t *chunks_array;
|
|
|
|
/* IB */
|
|
|
|
unsigned idx;
|
|
|
|
/* relocations */
|
|
|
|
unsigned nrelocs;
|
2014-11-27 21:48:42 +08:00
|
|
|
struct radeon_bo_list *relocs;
|
|
|
|
struct radeon_bo_list *vm_bos;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct list_head validated;
|
2011-11-18 23:19:47 +08:00
|
|
|
unsigned dma_reloc_idx;
|
2009-06-05 20:42:42 +08:00
|
|
|
/* indices of various chunks */
|
2014-12-03 22:53:24 +08:00
|
|
|
struct radeon_cs_chunk *chunk_ib;
|
|
|
|
struct radeon_cs_chunk *chunk_relocs;
|
|
|
|
struct radeon_cs_chunk *chunk_flags;
|
|
|
|
struct radeon_cs_chunk *chunk_const_ib;
|
2012-05-09 21:35:02 +08:00
|
|
|
struct radeon_ib ib;
|
|
|
|
struct radeon_ib const_ib;
|
2009-06-05 20:42:42 +08:00
|
|
|
void *track;
|
2009-09-08 08:10:24 +08:00
|
|
|
unsigned family;
|
2011-10-25 07:38:45 +08:00
|
|
|
int parser_error;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
u32 cs_flags;
|
|
|
|
u32 ring;
|
|
|
|
s32 priority;
|
2013-06-27 19:48:17 +08:00
|
|
|
struct ww_acquire_ctx ticket;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
drm/radeon: fixup locking inversion between, mmap_sem and reservations
op 08-10-13 18:58, Thomas Hellstrom schreef:
> On 10/08/2013 06:47 PM, Jerome Glisse wrote:
>> On Tue, Oct 08, 2013 at 06:29:35PM +0200, Thomas Hellstrom wrote:
>>> On 10/08/2013 04:55 PM, Jerome Glisse wrote:
>>>> On Tue, Oct 08, 2013 at 04:45:18PM +0200, Christian König wrote:
>>>>> Am 08.10.2013 16:33, schrieb Jerome Glisse:
>>>>>> On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote:
>>>>>>> Allocate and copy all kernel memory before doing reservations. This prevents a locking
>>>>>>> inversion between mmap_sem and reservation_class, and allows us to drop the trylocking
>>>>>>> in ttm_bo_vm_fault without upsetting lockdep.
>>>>>>>
>>>>>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>>>>>> I would say NAK. Current code only allocate temporary page in AGP case.
>>>>>> So AGP case is userspace -> temp page -> cs checker -> radeon ib.
>>>>>>
>>>>>> Non AGP is directly memcpy to radeon IB.
>>>>>>
>>>>>> Your patch allocate memory memcpy userspace to it and it will then be
>>>>>> memcpy to IB. Which means you introduce an extra memcpy in the process
>>>>>> not something we want.
>>>>> Totally agree. Additional to that there is no good reason to provide
>>>>> anything else than anonymous system memory to the CS ioctl, so the
>>>>> dependency between the mmap_sem and reservations are not really
>>>>> clear to me.
>>>>>
>>>>> Christian.
>>>> I think is that in other code path you take mmap_sem first then reserve
>>>> bo. But here we reserve bo and then we take mmap_sem because of copy
>>> >from user.
>>>> Cheers,
>>>> Jerome
>>>>
>>> Actually the log message is a little confusing. I think the mmap_sem
>>> locking inversion problem is orthogonal to what's being fixed here.
>>>
>>> This patch fixes the possible recursive bo::reserve caused by
>>> malicious user-space handing a pointer to ttm memory so that the ttm
>>> fault handler is called when bos are already reserved. That may
>>> cause a (possibly interruptible) livelock.
>>>
>>> Once that is fixed, we are free to choose the mmap_sem ->
>>> bo::reserve locking order. Currently it's bo::reserve->mmap_sem(),
>>> but the hack required in the ttm fault handler is admittedly a bit
>>> ugly. The plan is to change the locking order to
>>> mmap_sem->bo::reserve
>>>
>>> I'm not sure if it applies to this particular case, but it should be
>>> possible to make sure that copy_from_user_inatomic() will always
>>> succeed, by making sure the pages are present using
>>> get_user_pages(), and release the pages after
>>> copy_from_user_inatomic() is done. That way there's no need for a
>>> double memcpy slowpath, but if the copied data is very fragmented I
>>> guess the resulting code may look ugly. The get_user_pages()
>>> function will return an error if it hits TTM pages.
>>>
>>> /Thomas
>> get_user_pages + copy_from_user_inatomic is overkill. We should just
>> do get_user_pages which fails with ttm memory and then use copy_highpage
>> helper.
>>
>> Cheers,
>> Jerome
> Yeah, it may well be that that's the preferred solution.
>
> /Thomas
>
I still disagree, and shuffled radeon_ib_get around to be called sooner.
How does the patch below look?
8<-------
Allocate and copy all kernel memory before doing reservations. This prevents a locking
inversion between mmap_sem and reservation_class, and allows us to drop the trylocking
in ttm_bo_vm_fault without upsetting lockdep.
Changes since v1:
- Kill extra memcpy for !AGP case.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2013-10-09 20:36:57 +08:00
|
|
|
static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
|
|
|
|
{
|
2014-12-03 22:53:24 +08:00
|
|
|
struct radeon_cs_chunk *ibc = p->chunk_ib;
|
drm/radeon: fixup locking inversion between, mmap_sem and reservations
op 08-10-13 18:58, Thomas Hellstrom schreef:
> On 10/08/2013 06:47 PM, Jerome Glisse wrote:
>> On Tue, Oct 08, 2013 at 06:29:35PM +0200, Thomas Hellstrom wrote:
>>> On 10/08/2013 04:55 PM, Jerome Glisse wrote:
>>>> On Tue, Oct 08, 2013 at 04:45:18PM +0200, Christian König wrote:
>>>>> Am 08.10.2013 16:33, schrieb Jerome Glisse:
>>>>>> On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote:
>>>>>>> Allocate and copy all kernel memory before doing reservations. This prevents a locking
>>>>>>> inversion between mmap_sem and reservation_class, and allows us to drop the trylocking
>>>>>>> in ttm_bo_vm_fault without upsetting lockdep.
>>>>>>>
>>>>>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>>>>>> I would say NAK. Current code only allocate temporary page in AGP case.
>>>>>> So AGP case is userspace -> temp page -> cs checker -> radeon ib.
>>>>>>
>>>>>> Non AGP is directly memcpy to radeon IB.
>>>>>>
>>>>>> Your patch allocate memory memcpy userspace to it and it will then be
>>>>>> memcpy to IB. Which means you introduce an extra memcpy in the process
>>>>>> not something we want.
>>>>> Totally agree. Additional to that there is no good reason to provide
>>>>> anything else than anonymous system memory to the CS ioctl, so the
>>>>> dependency between the mmap_sem and reservations are not really
>>>>> clear to me.
>>>>>
>>>>> Christian.
>>>> I think is that in other code path you take mmap_sem first then reserve
>>>> bo. But here we reserve bo and then we take mmap_sem because of copy
>>> >from user.
>>>> Cheers,
>>>> Jerome
>>>>
>>> Actually the log message is a little confusing. I think the mmap_sem
>>> locking inversion problem is orthogonal to what's being fixed here.
>>>
>>> This patch fixes the possible recursive bo::reserve caused by
>>> malicious user-space handing a pointer to ttm memory so that the ttm
>>> fault handler is called when bos are already reserved. That may
>>> cause a (possibly interruptible) livelock.
>>>
>>> Once that is fixed, we are free to choose the mmap_sem ->
>>> bo::reserve locking order. Currently it's bo::reserve->mmap_sem(),
>>> but the hack required in the ttm fault handler is admittedly a bit
>>> ugly. The plan is to change the locking order to
>>> mmap_sem->bo::reserve
>>>
>>> I'm not sure if it applies to this particular case, but it should be
>>> possible to make sure that copy_from_user_inatomic() will always
>>> succeed, by making sure the pages are present using
>>> get_user_pages(), and release the pages after
>>> copy_from_user_inatomic() is done. That way there's no need for a
>>> double memcpy slowpath, but if the copied data is very fragmented I
>>> guess the resulting code may look ugly. The get_user_pages()
>>> function will return an error if it hits TTM pages.
>>>
>>> /Thomas
>> get_user_pages + copy_from_user_inatomic is overkill. We should just
>> do get_user_pages which fails with ttm memory and then use copy_highpage
>> helper.
>>
>> Cheers,
>> Jerome
> Yeah, it may well be that that's the preferred solution.
>
> /Thomas
>
I still disagree, and shuffled radeon_ib_get around to be called sooner.
How does the patch below look?
8<-------
Allocate and copy all kernel memory before doing reservations. This prevents a locking
inversion between mmap_sem and reservation_class, and allows us to drop the trylocking
in ttm_bo_vm_fault without upsetting lockdep.
Changes since v1:
- Kill extra memcpy for !AGP case.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2013-10-09 20:36:57 +08:00
|
|
|
|
|
|
|
if (ibc->kdata)
|
|
|
|
return ibc->kdata[idx];
|
|
|
|
return p->ib.ptr[idx];
|
|
|
|
}
|
|
|
|
|
2009-09-23 14:56:27 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_cs_packet {
|
|
|
|
unsigned idx;
|
|
|
|
unsigned type;
|
|
|
|
unsigned reg;
|
|
|
|
unsigned opcode;
|
|
|
|
int count;
|
|
|
|
unsigned one_reg_wr;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef int (*radeon_packet0_check_t)(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt,
|
|
|
|
unsigned idx, unsigned reg);
|
|
|
|
typedef int (*radeon_packet3_check_t)(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AGP
|
|
|
|
*/
|
|
|
|
int radeon_agp_init(struct radeon_device *rdev);
|
2009-11-05 13:39:10 +08:00
|
|
|
void radeon_agp_resume(struct radeon_device *rdev);
|
2010-05-22 00:48:54 +08:00
|
|
|
void radeon_agp_suspend(struct radeon_device *rdev);
|
2009-06-05 20:42:42 +08:00
|
|
|
void radeon_agp_fini(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Writeback
|
|
|
|
*/
|
|
|
|
struct radeon_wb {
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *wb_obj;
|
2009-06-05 20:42:42 +08:00
|
|
|
volatile uint32_t *wb;
|
|
|
|
uint64_t gpu_addr;
|
2010-08-28 06:25:25 +08:00
|
|
|
bool enabled;
|
2010-09-04 17:04:34 +08:00
|
|
|
bool use_event;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2010-08-28 06:25:25 +08:00
|
|
|
#define RADEON_WB_SCRATCH_OFFSET 0
|
2012-07-18 02:02:31 +08:00
|
|
|
#define RADEON_WB_RING0_NEXT_RPTR 256
|
2010-08-28 06:25:25 +08:00
|
|
|
#define RADEON_WB_CP_RPTR_OFFSET 1024
|
2011-03-03 09:07:31 +08:00
|
|
|
#define RADEON_WB_CP1_RPTR_OFFSET 1280
|
|
|
|
#define RADEON_WB_CP2_RPTR_OFFSET 1536
|
2012-09-28 03:08:35 +08:00
|
|
|
#define R600_WB_DMA_RPTR_OFFSET 1792
|
2010-08-28 06:25:25 +08:00
|
|
|
#define R600_WB_IH_WPTR_OFFSET 2048
|
2012-12-05 04:27:33 +08:00
|
|
|
#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
|
2010-09-04 17:04:34 +08:00
|
|
|
#define R600_WB_EVENT_OFFSET 3072
|
2013-06-27 05:37:11 +08:00
|
|
|
#define CIK_WB_CP1_WPTR_OFFSET 3328
|
|
|
|
#define CIK_WB_CP2_WPTR_OFFSET 3584
|
2014-10-14 01:20:02 +08:00
|
|
|
#define R600_WB_DMA_RING_TEST_OFFSET 3588
|
|
|
|
#define CAYMAN_WB_DMA1_RING_TEST_OFFSET 3592
|
2010-08-28 06:25:25 +08:00
|
|
|
|
2009-07-14 03:04:08 +08:00
|
|
|
/**
|
|
|
|
* struct radeon_pm - power management datas
|
|
|
|
* @max_bandwidth: maximum bandwidth the gpu has (MByte/s)
|
|
|
|
* @igp_sideport_mclk: sideport memory clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_system_mclk: system clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_ht_link_clk: ht link clock Mhz (rs690,rs740,rs780,rs880)
|
|
|
|
* @igp_ht_link_width: ht link width in bits (rs690,rs740,rs780,rs880)
|
|
|
|
* @k8_bandwidth: k8 bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @sideport_bandwidth: sideport bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @ht_bandwidth: ht bandwidth the gpu has (MByte/s) (IGP)
|
|
|
|
* @core_bandwidth: core GPU bandwidth the gpu has (MByte/s) (IGP)
|
2011-03-31 09:57:33 +08:00
|
|
|
* @sclk: GPU clock Mhz (core bandwidth depends of this clock)
|
2009-07-14 03:04:08 +08:00
|
|
|
* @needed_bandwidth: current bandwidth needs
|
|
|
|
*
|
|
|
|
* It keeps track of various data needed to take powermanagement decision.
|
2011-03-31 09:57:33 +08:00
|
|
|
* Bandwidth need is used to determine minimun clock of the GPU and memory.
|
2009-07-14 03:04:08 +08:00
|
|
|
* Equation between gpu/memory clock and available bandwidth is hw dependent
|
|
|
|
* (type of memory, bus size, efficiency, ...)
|
|
|
|
*/
|
2010-05-08 03:10:16 +08:00
|
|
|
|
|
|
|
enum radeon_pm_method {
|
|
|
|
PM_METHOD_PROFILE,
|
|
|
|
PM_METHOD_DYNPM,
|
2013-04-13 01:55:22 +08:00
|
|
|
PM_METHOD_DPM,
|
2010-05-08 03:10:16 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum radeon_dynpm_state {
|
|
|
|
DYNPM_STATE_DISABLED,
|
|
|
|
DYNPM_STATE_MINIMUM,
|
|
|
|
DYNPM_STATE_PAUSED,
|
DRM / radeon / KMS: Fix hibernation regression related to radeon PM (was: Re: [Regression, post-2.6.34] Hibernation broken on machines with radeon/KMS and r300)
There is a regression from 2.6.34 related to the recent radeon power
management changes, caused by attempting to cancel a delayed work
item that's never been scheduled. However, the code as is has some
other issues potentially leading to visible problems.
First, the mutex around cancel_delayed_work() in radeon_pm_suspend()
doesn't really serve any purpose, because cancel_delayed_work() only
tries to delete the work's timer. Moreover, it doesn't prevent the
work handler from running, so the handler can do some wrong things if
it wins the race and in that case it will rearm itself to do some
more wrong things going forward. So, I think it's better to wait for
the handler to return in case it's already been queued up for
execution. Also, it should be prevented from rearming itself in that
case.
Second, in radeon_set_pm_method() the cancel_delayed_work() is not
sufficient to prevent the work handler from running and queing up
itself for the next run (the failure scenario is that
cancel_delayed_work() returns 0, so the handler is run, it waits on
the mutex and then rearms itself after the mutex has been released),
so again the work handler should be prevented from rearming itself in
that case..
Finally, there's a potential deadlock in radeon_pm_fini(), because
cancel_delayed_work_sync() is called under rdev->pm.mutex, but the
work handler tries to acquire the same mutex (if it wins the race).
Fix the issues described above.
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reviewed-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-06-18 07:02:27 +08:00
|
|
|
DYNPM_STATE_ACTIVE,
|
|
|
|
DYNPM_STATE_SUSPENDED,
|
2009-12-23 06:02:16 +08:00
|
|
|
};
|
2010-05-08 03:10:16 +08:00
|
|
|
enum radeon_dynpm_action {
|
|
|
|
DYNPM_ACTION_NONE,
|
|
|
|
DYNPM_ACTION_MINIMUM,
|
|
|
|
DYNPM_ACTION_DOWNCLOCK,
|
|
|
|
DYNPM_ACTION_UPCLOCK,
|
|
|
|
DYNPM_ACTION_DEFAULT
|
2009-12-23 06:02:16 +08:00
|
|
|
};
|
2009-12-29 02:58:44 +08:00
|
|
|
|
|
|
|
enum radeon_voltage_type {
|
|
|
|
VOLTAGE_NONE = 0,
|
|
|
|
VOLTAGE_GPIO,
|
|
|
|
VOLTAGE_VDDC,
|
|
|
|
VOLTAGE_SW
|
|
|
|
};
|
|
|
|
|
2009-12-24 02:21:58 +08:00
|
|
|
enum radeon_pm_state_type {
|
2013-04-13 01:55:22 +08:00
|
|
|
/* not used for dpm */
|
2009-12-24 02:21:58 +08:00
|
|
|
POWER_STATE_TYPE_DEFAULT,
|
|
|
|
POWER_STATE_TYPE_POWERSAVE,
|
2013-04-13 01:55:22 +08:00
|
|
|
/* user selectable states */
|
2009-12-24 02:21:58 +08:00
|
|
|
POWER_STATE_TYPE_BATTERY,
|
|
|
|
POWER_STATE_TYPE_BALANCED,
|
|
|
|
POWER_STATE_TYPE_PERFORMANCE,
|
2013-04-13 01:55:22 +08:00
|
|
|
/* internal states */
|
|
|
|
POWER_STATE_TYPE_INTERNAL_UVD,
|
|
|
|
POWER_STATE_TYPE_INTERNAL_UVD_SD,
|
|
|
|
POWER_STATE_TYPE_INTERNAL_UVD_HD,
|
|
|
|
POWER_STATE_TYPE_INTERNAL_UVD_HD2,
|
|
|
|
POWER_STATE_TYPE_INTERNAL_UVD_MVC,
|
|
|
|
POWER_STATE_TYPE_INTERNAL_BOOT,
|
|
|
|
POWER_STATE_TYPE_INTERNAL_THERMAL,
|
|
|
|
POWER_STATE_TYPE_INTERNAL_ACPI,
|
|
|
|
POWER_STATE_TYPE_INTERNAL_ULV,
|
2013-07-05 23:48:31 +08:00
|
|
|
POWER_STATE_TYPE_INTERNAL_3DPERF,
|
2009-12-24 02:21:58 +08:00
|
|
|
};
|
|
|
|
|
2010-05-08 03:10:16 +08:00
|
|
|
enum radeon_pm_profile_type {
|
|
|
|
PM_PROFILE_DEFAULT,
|
|
|
|
PM_PROFILE_AUTO,
|
|
|
|
PM_PROFILE_LOW,
|
2010-06-03 05:56:01 +08:00
|
|
|
PM_PROFILE_MID,
|
2010-05-08 03:10:16 +08:00
|
|
|
PM_PROFILE_HIGH,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define PM_PROFILE_DEFAULT_IDX 0
|
|
|
|
#define PM_PROFILE_LOW_SH_IDX 1
|
2010-06-03 05:56:01 +08:00
|
|
|
#define PM_PROFILE_MID_SH_IDX 2
|
|
|
|
#define PM_PROFILE_HIGH_SH_IDX 3
|
|
|
|
#define PM_PROFILE_LOW_MH_IDX 4
|
|
|
|
#define PM_PROFILE_MID_MH_IDX 5
|
|
|
|
#define PM_PROFILE_HIGH_MH_IDX 6
|
|
|
|
#define PM_PROFILE_MAX 7
|
2010-05-08 03:10:16 +08:00
|
|
|
|
|
|
|
struct radeon_pm_profile {
|
|
|
|
int dpms_off_ps_idx;
|
|
|
|
int dpms_on_ps_idx;
|
|
|
|
int dpms_off_cm_idx;
|
|
|
|
int dpms_on_cm_idx;
|
2009-12-24 03:28:05 +08:00
|
|
|
};
|
|
|
|
|
2010-07-03 00:58:16 +08:00
|
|
|
enum radeon_int_thermal_type {
|
|
|
|
THERMAL_TYPE_NONE,
|
2013-04-13 01:55:22 +08:00
|
|
|
THERMAL_TYPE_EXTERNAL,
|
|
|
|
THERMAL_TYPE_EXTERNAL_GPIO,
|
2010-07-03 00:58:16 +08:00
|
|
|
THERMAL_TYPE_RV6XX,
|
|
|
|
THERMAL_TYPE_RV770,
|
2013-04-13 01:55:22 +08:00
|
|
|
THERMAL_TYPE_ADT7473_WITH_INTERNAL,
|
2010-07-03 00:58:16 +08:00
|
|
|
THERMAL_TYPE_EVERGREEN,
|
2010-11-23 06:56:32 +08:00
|
|
|
THERMAL_TYPE_SUMO,
|
2011-01-07 10:19:22 +08:00
|
|
|
THERMAL_TYPE_NI,
|
2012-03-21 05:18:09 +08:00
|
|
|
THERMAL_TYPE_SI,
|
2013-04-13 01:55:22 +08:00
|
|
|
THERMAL_TYPE_EMC2103_WITH_INTERNAL,
|
2012-12-19 11:07:14 +08:00
|
|
|
THERMAL_TYPE_CI,
|
2013-04-23 09:41:26 +08:00
|
|
|
THERMAL_TYPE_KV,
|
2010-07-03 00:58:16 +08:00
|
|
|
};
|
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_voltage {
|
|
|
|
enum radeon_voltage_type type;
|
|
|
|
/* gpio voltage */
|
|
|
|
struct radeon_gpio_rec gpio;
|
|
|
|
u32 delay; /* delay in usec from voltage drop to sclk change */
|
|
|
|
bool active_high; /* voltage drop is active when bit is high */
|
|
|
|
/* VDDC voltage */
|
|
|
|
u8 vddc_id; /* index into vddc voltage table */
|
|
|
|
u8 vddci_id; /* index into vddci voltage table */
|
|
|
|
bool vddci_enabled;
|
|
|
|
/* r6xx+ sw */
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 voltage;
|
|
|
|
/* evergreen+ vddci */
|
|
|
|
u16 vddci;
|
2009-12-29 02:58:44 +08:00
|
|
|
};
|
|
|
|
|
2010-05-03 13:13:14 +08:00
|
|
|
/* clock mode flags */
|
|
|
|
#define RADEON_PM_MODE_NO_DISPLAY (1 << 0)
|
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_pm_clock_info {
|
|
|
|
/* memory clock */
|
|
|
|
u32 mclk;
|
|
|
|
/* engine clock */
|
|
|
|
u32 sclk;
|
|
|
|
/* voltage info */
|
|
|
|
struct radeon_voltage voltage;
|
2010-05-03 13:13:14 +08:00
|
|
|
/* standardized clock flags */
|
2009-12-29 02:58:44 +08:00
|
|
|
u32 flags;
|
|
|
|
};
|
|
|
|
|
2010-04-23 02:03:55 +08:00
|
|
|
/* state flags */
|
2010-05-03 13:13:14 +08:00
|
|
|
#define RADEON_PM_STATE_SINGLE_DISPLAY_ONLY (1 << 0)
|
2010-04-23 02:03:55 +08:00
|
|
|
|
2009-12-29 02:58:44 +08:00
|
|
|
struct radeon_power_state {
|
2009-12-24 02:21:58 +08:00
|
|
|
enum radeon_pm_state_type type;
|
2011-11-04 22:09:43 +08:00
|
|
|
struct radeon_pm_clock_info *clock_info;
|
2009-12-29 02:58:44 +08:00
|
|
|
/* number of valid clock modes in this power state */
|
|
|
|
int num_clock_modes;
|
|
|
|
struct radeon_pm_clock_info *default_clock_mode;
|
2010-04-23 02:03:55 +08:00
|
|
|
/* standardized state flags */
|
|
|
|
u32 flags;
|
2010-04-23 02:25:19 +08:00
|
|
|
u32 misc; /* vbios specific flags */
|
|
|
|
u32 misc2; /* vbios specific flags */
|
|
|
|
int pcie_lanes; /* pcie lanes */
|
2009-12-29 02:58:44 +08:00
|
|
|
};
|
|
|
|
|
2010-02-12 06:16:36 +08:00
|
|
|
/*
|
|
|
|
* Some modes are overclocked by very low value, accept them
|
|
|
|
*/
|
|
|
|
#define RADEON_MODE_OVERCLOCK_MARGIN 500 /* 5 MHz */
|
|
|
|
|
2013-04-13 01:58:03 +08:00
|
|
|
enum radeon_dpm_auto_throttle_src {
|
|
|
|
RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL,
|
|
|
|
RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL
|
|
|
|
};
|
|
|
|
|
|
|
|
enum radeon_dpm_event_src {
|
|
|
|
RADEON_DPM_EVENT_SRC_ANALOG = 0,
|
|
|
|
RADEON_DPM_EVENT_SRC_EXTERNAL = 1,
|
|
|
|
RADEON_DPM_EVENT_SRC_DIGITAL = 2,
|
|
|
|
RADEON_DPM_EVENT_SRC_ANALOG_OR_EXTERNAL = 3,
|
|
|
|
RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
|
|
|
|
};
|
|
|
|
|
2013-09-05 04:13:56 +08:00
|
|
|
#define RADEON_MAX_VCE_LEVELS 6
|
|
|
|
|
2013-08-21 08:29:05 +08:00
|
|
|
enum radeon_vce_level {
|
|
|
|
RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */
|
|
|
|
RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */
|
|
|
|
RADEON_VCE_LEVEL_DC_LL_LOW = 2, /* DC, low latency queue, res <= 720 */
|
|
|
|
RADEON_VCE_LEVEL_DC_LL_HIGH = 3, /* DC, low latency queue, 1080 >= res > 720 */
|
|
|
|
RADEON_VCE_LEVEL_DC_GP_LOW = 4, /* DC, general purpose queue, res <= 720 */
|
|
|
|
RADEON_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res > 720 */
|
|
|
|
};
|
|
|
|
|
2013-04-13 01:55:22 +08:00
|
|
|
struct radeon_ps {
|
|
|
|
u32 caps; /* vbios flags */
|
|
|
|
u32 class; /* vbios flags */
|
|
|
|
u32 class2; /* vbios flags */
|
|
|
|
/* UVD clocks */
|
|
|
|
u32 vclk;
|
|
|
|
u32 dclk;
|
2013-05-16 03:53:57 +08:00
|
|
|
/* VCE clocks */
|
|
|
|
u32 evclk;
|
|
|
|
u32 ecclk;
|
2013-08-21 08:29:05 +08:00
|
|
|
bool vce_active;
|
|
|
|
enum radeon_vce_level vce_level;
|
2013-04-13 01:55:22 +08:00
|
|
|
/* asic priv */
|
|
|
|
void *ps_priv;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_dpm_thermal {
|
|
|
|
/* thermal interrupt work */
|
|
|
|
struct work_struct work;
|
|
|
|
/* low temperature threshold */
|
|
|
|
int min_temp;
|
|
|
|
/* high temperature threshold */
|
|
|
|
int max_temp;
|
|
|
|
/* was interrupt low to high or high to low */
|
|
|
|
bool high_to_low;
|
|
|
|
};
|
|
|
|
|
2012-11-30 08:27:56 +08:00
|
|
|
enum radeon_clk_action
|
|
|
|
{
|
|
|
|
RADEON_SCLK_UP = 1,
|
|
|
|
RADEON_SCLK_DOWN
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_blacklist_clocks
|
|
|
|
{
|
|
|
|
u32 sclk;
|
|
|
|
u32 mclk;
|
|
|
|
enum radeon_clk_action action;
|
|
|
|
};
|
|
|
|
|
2012-11-15 08:57:42 +08:00
|
|
|
struct radeon_clock_and_voltage_limits {
|
|
|
|
u32 sclk;
|
|
|
|
u32 mclk;
|
2013-10-24 04:13:42 +08:00
|
|
|
u16 vddc;
|
|
|
|
u16 vddci;
|
2012-11-15 08:57:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_clock_array {
|
|
|
|
u32 count;
|
|
|
|
u32 *values;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_clock_voltage_dependency_entry {
|
|
|
|
u32 clk;
|
|
|
|
u16 v;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_clock_voltage_dependency_table {
|
|
|
|
u32 count;
|
|
|
|
struct radeon_clock_voltage_dependency_entry *entries;
|
|
|
|
};
|
|
|
|
|
2013-05-06 23:31:04 +08:00
|
|
|
union radeon_cac_leakage_entry {
|
|
|
|
struct {
|
|
|
|
u16 vddc;
|
|
|
|
u32 leakage;
|
|
|
|
};
|
|
|
|
struct {
|
|
|
|
u16 vddc1;
|
|
|
|
u16 vddc2;
|
|
|
|
u16 vddc3;
|
|
|
|
};
|
2012-11-15 08:57:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_cac_leakage_table {
|
|
|
|
u32 count;
|
2013-05-06 23:31:04 +08:00
|
|
|
union radeon_cac_leakage_entry *entries;
|
2012-11-15 08:57:42 +08:00
|
|
|
};
|
|
|
|
|
2013-03-21 00:30:25 +08:00
|
|
|
struct radeon_phase_shedding_limits_entry {
|
|
|
|
u16 voltage;
|
|
|
|
u32 sclk;
|
|
|
|
u32 mclk;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_phase_shedding_limits_table {
|
|
|
|
u32 count;
|
|
|
|
struct radeon_phase_shedding_limits_entry *entries;
|
|
|
|
};
|
|
|
|
|
2013-04-20 07:11:37 +08:00
|
|
|
struct radeon_uvd_clock_voltage_dependency_entry {
|
|
|
|
u32 vclk;
|
|
|
|
u32 dclk;
|
|
|
|
u16 v;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_uvd_clock_voltage_dependency_table {
|
|
|
|
u8 count;
|
|
|
|
struct radeon_uvd_clock_voltage_dependency_entry *entries;
|
|
|
|
};
|
|
|
|
|
2013-05-10 04:37:28 +08:00
|
|
|
struct radeon_vce_clock_voltage_dependency_entry {
|
|
|
|
u32 ecclk;
|
|
|
|
u32 evclk;
|
|
|
|
u16 v;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_vce_clock_voltage_dependency_table {
|
|
|
|
u8 count;
|
|
|
|
struct radeon_vce_clock_voltage_dependency_entry *entries;
|
|
|
|
};
|
|
|
|
|
2013-03-21 01:00:18 +08:00
|
|
|
struct radeon_ppm_table {
|
|
|
|
u8 ppm_design;
|
|
|
|
u16 cpu_core_number;
|
|
|
|
u32 platform_tdp;
|
|
|
|
u32 small_ac_platform_tdp;
|
|
|
|
u32 platform_tdc;
|
|
|
|
u32 small_ac_platform_tdc;
|
|
|
|
u32 apu_tdp;
|
|
|
|
u32 dgpu_tdp;
|
|
|
|
u32 dgpu_ulv_power;
|
|
|
|
u32 tj_max;
|
|
|
|
};
|
|
|
|
|
2013-05-07 00:15:33 +08:00
|
|
|
struct radeon_cac_tdp_table {
|
|
|
|
u16 tdp;
|
|
|
|
u16 configurable_tdp;
|
|
|
|
u16 tdc;
|
|
|
|
u16 battery_power_limit;
|
|
|
|
u16 small_power_limit;
|
|
|
|
u16 low_cac_leakage;
|
|
|
|
u16 high_cac_leakage;
|
|
|
|
u16 maximum_power_delivery_limit;
|
|
|
|
};
|
|
|
|
|
2012-11-15 08:57:42 +08:00
|
|
|
struct radeon_dpm_dynamic_state {
|
|
|
|
struct radeon_clock_voltage_dependency_table vddc_dependency_on_sclk;
|
|
|
|
struct radeon_clock_voltage_dependency_table vddci_dependency_on_mclk;
|
|
|
|
struct radeon_clock_voltage_dependency_table vddc_dependency_on_mclk;
|
2013-05-07 02:37:56 +08:00
|
|
|
struct radeon_clock_voltage_dependency_table mvdd_dependency_on_mclk;
|
2013-03-23 03:59:10 +08:00
|
|
|
struct radeon_clock_voltage_dependency_table vddc_dependency_on_dispclk;
|
2013-04-20 07:11:37 +08:00
|
|
|
struct radeon_uvd_clock_voltage_dependency_table uvd_clock_voltage_dependency_table;
|
2013-05-10 04:37:28 +08:00
|
|
|
struct radeon_vce_clock_voltage_dependency_table vce_clock_voltage_dependency_table;
|
2013-05-10 04:42:33 +08:00
|
|
|
struct radeon_clock_voltage_dependency_table samu_clock_voltage_dependency_table;
|
|
|
|
struct radeon_clock_voltage_dependency_table acp_clock_voltage_dependency_table;
|
2012-11-15 08:57:42 +08:00
|
|
|
struct radeon_clock_array valid_sclk_values;
|
|
|
|
struct radeon_clock_array valid_mclk_values;
|
|
|
|
struct radeon_clock_and_voltage_limits max_clock_voltage_on_dc;
|
|
|
|
struct radeon_clock_and_voltage_limits max_clock_voltage_on_ac;
|
|
|
|
u32 mclk_sclk_ratio;
|
|
|
|
u32 sclk_mclk_delta;
|
|
|
|
u16 vddc_vddci_delta;
|
|
|
|
u16 min_vddc_for_pcie_gen2;
|
|
|
|
struct radeon_cac_leakage_table cac_leakage_table;
|
2013-03-21 00:30:25 +08:00
|
|
|
struct radeon_phase_shedding_limits_table phase_shedding_limits_table;
|
2013-03-21 01:00:18 +08:00
|
|
|
struct radeon_ppm_table *ppm_table;
|
2013-05-07 00:15:33 +08:00
|
|
|
struct radeon_cac_tdp_table *cac_tdp_table;
|
2012-11-15 08:57:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_dpm_fan {
|
|
|
|
u16 t_min;
|
|
|
|
u16 t_med;
|
|
|
|
u16 t_high;
|
|
|
|
u16 pwm_min;
|
|
|
|
u16 pwm_med;
|
|
|
|
u16 pwm_high;
|
|
|
|
u8 t_hyst;
|
|
|
|
u32 cycle_delay;
|
|
|
|
u16 t_max;
|
2014-09-15 12:15:22 +08:00
|
|
|
u8 control_mode;
|
|
|
|
u16 default_max_fan_pwm;
|
|
|
|
u16 default_fan_output_sensitivity;
|
|
|
|
u16 fan_output_sensitivity;
|
2012-11-15 08:57:42 +08:00
|
|
|
bool ucode_fan_control;
|
|
|
|
};
|
|
|
|
|
2013-03-19 05:03:01 +08:00
|
|
|
enum radeon_pcie_gen {
|
|
|
|
RADEON_PCIE_GEN1 = 0,
|
|
|
|
RADEON_PCIE_GEN2 = 1,
|
|
|
|
RADEON_PCIE_GEN3 = 2,
|
|
|
|
RADEON_PCIE_GEN_INVALID = 0xffff
|
|
|
|
};
|
|
|
|
|
2013-07-03 06:38:02 +08:00
|
|
|
enum radeon_dpm_forced_level {
|
|
|
|
RADEON_DPM_FORCED_LEVEL_AUTO = 0,
|
|
|
|
RADEON_DPM_FORCED_LEVEL_LOW = 1,
|
|
|
|
RADEON_DPM_FORCED_LEVEL_HIGH = 2,
|
|
|
|
};
|
|
|
|
|
2013-09-05 04:13:56 +08:00
|
|
|
struct radeon_vce_state {
|
|
|
|
/* vce clocks */
|
|
|
|
u32 evclk;
|
|
|
|
u32 ecclk;
|
|
|
|
/* gpu clocks */
|
|
|
|
u32 sclk;
|
|
|
|
u32 mclk;
|
|
|
|
u8 clk_idx;
|
|
|
|
u8 pstate;
|
|
|
|
};
|
|
|
|
|
2013-04-13 01:55:22 +08:00
|
|
|
struct radeon_dpm {
|
|
|
|
struct radeon_ps *ps;
|
|
|
|
/* number of valid power states */
|
|
|
|
int num_ps;
|
|
|
|
/* current power state that is active */
|
|
|
|
struct radeon_ps *current_ps;
|
|
|
|
/* requested power state */
|
|
|
|
struct radeon_ps *requested_ps;
|
|
|
|
/* boot up power state */
|
|
|
|
struct radeon_ps *boot_ps;
|
|
|
|
/* default uvd power state */
|
|
|
|
struct radeon_ps *uvd_ps;
|
2013-09-05 04:13:56 +08:00
|
|
|
/* vce requirements */
|
|
|
|
struct radeon_vce_state vce_states[RADEON_MAX_VCE_LEVELS];
|
|
|
|
enum radeon_vce_level vce_level;
|
2013-04-13 01:55:22 +08:00
|
|
|
enum radeon_pm_state_type state;
|
|
|
|
enum radeon_pm_state_type user_state;
|
|
|
|
u32 platform_caps;
|
|
|
|
u32 voltage_response_time;
|
|
|
|
u32 backbias_response_time;
|
|
|
|
void *priv;
|
|
|
|
u32 new_active_crtcs;
|
|
|
|
int new_active_crtc_count;
|
|
|
|
u32 current_active_crtcs;
|
|
|
|
int current_active_crtc_count;
|
2015-03-19 05:05:10 +08:00
|
|
|
bool single_display;
|
2012-11-15 08:57:42 +08:00
|
|
|
struct radeon_dpm_dynamic_state dyn_state;
|
|
|
|
struct radeon_dpm_fan fan;
|
|
|
|
u32 tdp_limit;
|
|
|
|
u32 near_tdp_limit;
|
2013-06-26 05:56:16 +08:00
|
|
|
u32 near_tdp_limit_adjusted;
|
2012-11-15 08:57:42 +08:00
|
|
|
u32 sq_ramping_threshold;
|
|
|
|
u32 cac_leakage;
|
|
|
|
u16 tdp_od_limit;
|
|
|
|
u32 tdp_adjustment;
|
|
|
|
u16 load_line_slope;
|
|
|
|
bool power_control;
|
2012-11-30 23:56:57 +08:00
|
|
|
bool ac_power;
|
2013-04-13 01:55:22 +08:00
|
|
|
/* special states active */
|
|
|
|
bool thermal_active;
|
2013-06-22 03:12:57 +08:00
|
|
|
bool uvd_active;
|
2013-08-21 08:29:05 +08:00
|
|
|
bool vce_active;
|
2013-04-13 01:55:22 +08:00
|
|
|
/* thermal handling */
|
|
|
|
struct radeon_dpm_thermal thermal;
|
2013-07-03 06:38:02 +08:00
|
|
|
/* forced levels */
|
|
|
|
enum radeon_dpm_forced_level forced_level;
|
2013-07-25 00:12:49 +08:00
|
|
|
/* track UVD streams */
|
|
|
|
unsigned sd;
|
|
|
|
unsigned hd;
|
2013-04-13 01:55:22 +08:00
|
|
|
};
|
|
|
|
|
2013-07-25 00:12:49 +08:00
|
|
|
void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable);
|
2013-08-23 23:56:26 +08:00
|
|
|
void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable);
|
2013-04-13 01:55:22 +08:00
|
|
|
|
2009-07-14 03:04:08 +08:00
|
|
|
struct radeon_pm {
|
2009-12-23 06:02:16 +08:00
|
|
|
struct mutex mutex;
|
2012-05-11 20:57:18 +08:00
|
|
|
/* write locked while reprogramming mclk */
|
|
|
|
struct rw_semaphore mclk_lock;
|
2010-04-23 02:03:55 +08:00
|
|
|
u32 active_crtcs;
|
|
|
|
int active_crtc_count;
|
2009-12-23 06:02:16 +08:00
|
|
|
int req_vblank;
|
2010-03-03 05:06:51 +08:00
|
|
|
bool vblank_sync;
|
2009-07-14 03:04:08 +08:00
|
|
|
fixed20_12 max_bandwidth;
|
|
|
|
fixed20_12 igp_sideport_mclk;
|
|
|
|
fixed20_12 igp_system_mclk;
|
|
|
|
fixed20_12 igp_ht_link_clk;
|
|
|
|
fixed20_12 igp_ht_link_width;
|
|
|
|
fixed20_12 k8_bandwidth;
|
|
|
|
fixed20_12 sideport_bandwidth;
|
|
|
|
fixed20_12 ht_bandwidth;
|
|
|
|
fixed20_12 core_bandwidth;
|
|
|
|
fixed20_12 sclk;
|
2010-03-17 08:54:38 +08:00
|
|
|
fixed20_12 mclk;
|
2009-07-14 03:04:08 +08:00
|
|
|
fixed20_12 needed_bandwidth;
|
2011-02-03 07:42:03 +08:00
|
|
|
struct radeon_power_state *power_state;
|
2009-12-29 02:58:44 +08:00
|
|
|
/* number of valid power states */
|
|
|
|
int num_power_states;
|
2010-04-23 02:03:55 +08:00
|
|
|
int current_power_state_index;
|
|
|
|
int current_clock_mode_index;
|
|
|
|
int requested_power_state_index;
|
|
|
|
int requested_clock_mode_index;
|
|
|
|
int default_power_state_index;
|
|
|
|
u32 current_sclk;
|
|
|
|
u32 current_mclk;
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 current_vddc;
|
|
|
|
u16 current_vddci;
|
2011-01-07 10:19:26 +08:00
|
|
|
u32 default_sclk;
|
|
|
|
u32 default_mclk;
|
2011-04-13 02:49:24 +08:00
|
|
|
u16 default_vddc;
|
|
|
|
u16 default_vddci;
|
2010-03-11 23:01:17 +08:00
|
|
|
struct radeon_i2c_chan *i2c_bus;
|
2010-05-08 03:10:16 +08:00
|
|
|
/* selected pm method */
|
|
|
|
enum radeon_pm_method pm_method;
|
|
|
|
/* dynpm power management */
|
|
|
|
struct delayed_work dynpm_idle_work;
|
|
|
|
enum radeon_dynpm_state dynpm_state;
|
|
|
|
enum radeon_dynpm_action dynpm_planned_action;
|
|
|
|
unsigned long dynpm_action_timeout;
|
|
|
|
bool dynpm_can_upclock;
|
|
|
|
bool dynpm_can_downclock;
|
|
|
|
/* profile-based power management */
|
|
|
|
enum radeon_pm_profile_type profile;
|
|
|
|
int profile_index;
|
|
|
|
struct radeon_pm_profile profiles[PM_PROFILE_MAX];
|
2010-07-03 00:58:16 +08:00
|
|
|
/* internal thermal controller on rv6xx+ */
|
|
|
|
enum radeon_int_thermal_type int_thermal_type;
|
|
|
|
struct device *int_hwmon_dev;
|
2014-09-08 14:51:49 +08:00
|
|
|
/* fan control parameters */
|
|
|
|
bool no_fan;
|
|
|
|
u8 fan_pulses_per_revolution;
|
|
|
|
u8 fan_min_rpm;
|
|
|
|
u8 fan_max_rpm;
|
2013-04-13 01:55:22 +08:00
|
|
|
/* dpm */
|
|
|
|
bool dpm_enabled;
|
2015-10-23 22:38:52 +08:00
|
|
|
bool sysfs_initialized;
|
2013-04-13 01:55:22 +08:00
|
|
|
struct radeon_dpm dpm;
|
2009-07-14 03:04:08 +08:00
|
|
|
};
|
|
|
|
|
2011-11-04 22:09:41 +08:00
|
|
|
int radeon_pm_get_type_index(struct radeon_device *rdev,
|
|
|
|
enum radeon_pm_state_type ps_type,
|
|
|
|
int instance);
|
2013-04-08 18:41:29 +08:00
|
|
|
/*
|
|
|
|
* UVD
|
|
|
|
*/
|
2016-04-07 03:33:52 +08:00
|
|
|
#define RADEON_DEFAULT_UVD_HANDLES 10
|
|
|
|
#define RADEON_MAX_UVD_HANDLES 30
|
|
|
|
#define RADEON_UVD_STACK_SIZE (200*1024)
|
|
|
|
#define RADEON_UVD_HEAP_SIZE (256*1024)
|
|
|
|
#define RADEON_UVD_SESSION_SIZE (50*1024)
|
2013-04-08 18:41:29 +08:00
|
|
|
|
|
|
|
struct radeon_uvd {
|
2016-04-07 03:33:51 +08:00
|
|
|
bool fw_header_present;
|
2013-04-08 18:41:29 +08:00
|
|
|
struct radeon_bo *vcpu_bo;
|
|
|
|
void *cpu_addr;
|
|
|
|
uint64_t gpu_addr;
|
2016-04-07 03:33:52 +08:00
|
|
|
unsigned max_handles;
|
2013-04-08 18:41:29 +08:00
|
|
|
atomic_t handles[RADEON_MAX_UVD_HANDLES];
|
|
|
|
struct drm_file *filp[RADEON_MAX_UVD_HANDLES];
|
2013-08-06 00:41:20 +08:00
|
|
|
unsigned img_size[RADEON_MAX_UVD_HANDLES];
|
2013-04-18 21:25:59 +08:00
|
|
|
struct delayed_work idle_work;
|
2013-04-08 18:41:29 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_uvd_init(struct radeon_device *rdev);
|
|
|
|
void radeon_uvd_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_uvd_suspend(struct radeon_device *rdev);
|
|
|
|
int radeon_uvd_resume(struct radeon_device *rdev);
|
|
|
|
int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
|
|
|
|
uint32_t handle, struct radeon_fence **fence);
|
|
|
|
int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
|
|
|
|
uint32_t handle, struct radeon_fence **fence);
|
2014-08-21 18:18:12 +08:00
|
|
|
void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
|
|
|
|
uint32_t allowed_domains);
|
2013-04-08 18:41:29 +08:00
|
|
|
void radeon_uvd_free_handles(struct radeon_device *rdev,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
|
2013-04-18 21:25:59 +08:00
|
|
|
void radeon_uvd_note_usage(struct radeon_device *rdev);
|
2013-04-29 17:55:02 +08:00
|
|
|
int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
|
|
|
|
unsigned vclk, unsigned dclk,
|
|
|
|
unsigned vco_min, unsigned vco_max,
|
|
|
|
unsigned fb_factor, unsigned fb_mask,
|
|
|
|
unsigned pd_min, unsigned pd_max,
|
|
|
|
unsigned pd_even,
|
|
|
|
unsigned *optimal_fb_div,
|
|
|
|
unsigned *optimal_vclk_div,
|
|
|
|
unsigned *optimal_dclk_div);
|
|
|
|
int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
|
|
|
|
unsigned cg_upll_func_cntl);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2013-05-23 18:10:04 +08:00
|
|
|
/*
|
|
|
|
* VCE
|
|
|
|
*/
|
|
|
|
#define RADEON_MAX_VCE_HANDLES 16
|
|
|
|
|
|
|
|
struct radeon_vce {
|
|
|
|
struct radeon_bo *vcpu_bo;
|
|
|
|
uint64_t gpu_addr;
|
2014-01-24 00:50:49 +08:00
|
|
|
unsigned fw_version;
|
|
|
|
unsigned fb_version;
|
2013-05-23 18:10:04 +08:00
|
|
|
atomic_t handles[RADEON_MAX_VCE_HANDLES];
|
|
|
|
struct drm_file *filp[RADEON_MAX_VCE_HANDLES];
|
2014-05-06 03:42:18 +08:00
|
|
|
unsigned img_size[RADEON_MAX_VCE_HANDLES];
|
2013-08-23 23:56:26 +08:00
|
|
|
struct delayed_work idle_work;
|
2015-05-12 04:01:53 +08:00
|
|
|
uint32_t keyselect;
|
2013-05-23 18:10:04 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int radeon_vce_init(struct radeon_device *rdev);
|
|
|
|
void radeon_vce_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_vce_suspend(struct radeon_device *rdev);
|
|
|
|
int radeon_vce_resume(struct radeon_device *rdev);
|
|
|
|
int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
|
|
|
|
uint32_t handle, struct radeon_fence **fence);
|
|
|
|
int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
|
|
|
|
uint32_t handle, struct radeon_fence **fence);
|
|
|
|
void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp);
|
2013-08-23 23:56:26 +08:00
|
|
|
void radeon_vce_note_usage(struct radeon_device *rdev);
|
2014-05-06 03:42:18 +08:00
|
|
|
int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi, unsigned size);
|
2013-05-23 18:10:04 +08:00
|
|
|
int radeon_vce_cs_parse(struct radeon_cs_parser *p);
|
|
|
|
bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
|
|
|
|
struct radeon_ring *ring,
|
|
|
|
struct radeon_semaphore *semaphore,
|
|
|
|
bool emit_wait);
|
|
|
|
void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
|
|
|
|
void radeon_vce_fence_emit(struct radeon_device *rdev,
|
|
|
|
struct radeon_fence *fence);
|
|
|
|
int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
|
|
|
|
int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
|
|
|
|
|
2013-08-01 04:51:33 +08:00
|
|
|
struct r600_audio_pin {
|
2012-04-29 05:35:20 +08:00
|
|
|
int channels;
|
|
|
|
int rate;
|
|
|
|
int bits_per_sample;
|
|
|
|
u8 status_bits;
|
|
|
|
u8 category_code;
|
2013-08-01 04:51:33 +08:00
|
|
|
u32 offset;
|
|
|
|
bool connected;
|
|
|
|
u32 id;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct r600_audio {
|
|
|
|
bool enabled;
|
|
|
|
struct r600_audio_pin pin[RADEON_MAX_AFMT_BLOCKS];
|
|
|
|
int num_pins;
|
2014-12-02 02:49:39 +08:00
|
|
|
struct radeon_audio_funcs *hdmi_funcs;
|
|
|
|
struct radeon_audio_funcs *dp_funcs;
|
|
|
|
struct radeon_audio_basic_funcs *funcs;
|
2012-04-29 05:35:20 +08:00
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Benchmarking
|
|
|
|
*/
|
2011-10-13 11:29:39 +08:00
|
|
|
void radeon_benchmark(struct radeon_device *rdev, int test_number);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
|
2009-07-21 17:23:57 +08:00
|
|
|
/*
|
|
|
|
* Testing
|
|
|
|
*/
|
|
|
|
void radeon_test_moves(struct radeon_device *rdev);
|
2011-09-27 18:31:00 +08:00
|
|
|
void radeon_test_ring_sync(struct radeon_device *rdev,
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring *cpA,
|
|
|
|
struct radeon_ring *cpB);
|
2011-09-27 18:31:00 +08:00
|
|
|
void radeon_test_syncing(struct radeon_device *rdev);
|
2009-07-21 17:23:57 +08:00
|
|
|
|
2014-08-07 15:36:03 +08:00
|
|
|
/*
|
|
|
|
* MMU Notifier
|
|
|
|
*/
|
2015-01-22 06:49:59 +08:00
|
|
|
#if defined(CONFIG_MMU_NOTIFIER)
|
2014-08-07 15:36:03 +08:00
|
|
|
int radeon_mn_register(struct radeon_bo *bo, unsigned long addr);
|
|
|
|
void radeon_mn_unregister(struct radeon_bo *bo);
|
2015-01-22 06:49:59 +08:00
|
|
|
#else
|
|
|
|
static inline int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
|
|
|
|
{
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
static inline void radeon_mn_unregister(struct radeon_bo *bo) {}
|
|
|
|
#endif
|
2009-07-21 17:23:57 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Debugfs
|
|
|
|
*/
|
2011-10-24 20:54:54 +08:00
|
|
|
struct radeon_debugfs {
|
|
|
|
struct drm_info_list *files;
|
|
|
|
unsigned num_files;
|
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_debugfs_add_files(struct radeon_device *rdev,
|
|
|
|
struct drm_info_list *files,
|
|
|
|
unsigned nfiles);
|
|
|
|
int radeon_debugfs_fence_init(struct radeon_device *rdev);
|
|
|
|
|
2013-08-13 17:56:50 +08:00
|
|
|
/*
|
|
|
|
* ASIC ring specific functions.
|
|
|
|
*/
|
|
|
|
struct radeon_asic_ring {
|
|
|
|
/* ring read/write ptr handling */
|
|
|
|
u32 (*get_rptr)(struct radeon_device *rdev, struct radeon_ring *ring);
|
|
|
|
u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
|
|
|
|
void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
|
|
|
|
|
|
|
|
/* validating and patching of IBs */
|
|
|
|
int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib);
|
|
|
|
int (*cs_parse)(struct radeon_cs_parser *p);
|
|
|
|
|
|
|
|
/* command emmit functions */
|
|
|
|
void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
|
|
|
|
void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
|
2014-07-31 17:43:49 +08:00
|
|
|
void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring);
|
2013-11-12 19:58:05 +08:00
|
|
|
bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
|
2013-08-13 17:56:50 +08:00
|
|
|
struct radeon_semaphore *semaphore, bool emit_wait);
|
2014-11-19 21:01:19 +08:00
|
|
|
void (*vm_flush)(struct radeon_device *rdev, struct radeon_ring *ring,
|
|
|
|
unsigned vm_id, uint64_t pd_addr);
|
2013-08-13 17:56:50 +08:00
|
|
|
|
|
|
|
/* testing functions */
|
|
|
|
int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
|
|
|
|
/* deprecated */
|
|
|
|
void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp);
|
|
|
|
};
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* ASIC specific functions.
|
|
|
|
*/
|
|
|
|
struct radeon_asic {
|
2009-06-17 19:28:30 +08:00
|
|
|
int (*init)(struct radeon_device *rdev);
|
2009-09-08 08:10:24 +08:00
|
|
|
void (*fini)(struct radeon_device *rdev);
|
|
|
|
int (*resume)(struct radeon_device *rdev);
|
|
|
|
int (*suspend)(struct radeon_device *rdev);
|
2009-09-21 12:33:58 +08:00
|
|
|
void (*vga_set_state)(struct radeon_device *rdev, bool state);
|
2016-03-18 23:58:38 +08:00
|
|
|
int (*asic_reset)(struct radeon_device *rdev, bool hard);
|
2014-07-31 17:43:48 +08:00
|
|
|
/* Flush the HDP cache via MMIO */
|
|
|
|
void (*mmio_hdp_flush)(struct radeon_device *rdev);
|
2012-02-24 07:10:29 +08:00
|
|
|
/* check if 3D engine is idle */
|
|
|
|
bool (*gui_idle)(struct radeon_device *rdev);
|
|
|
|
/* wait for mc_idle */
|
|
|
|
int (*mc_wait_for_idle)(struct radeon_device *rdev);
|
2013-02-14 23:04:02 +08:00
|
|
|
/* get the reference clock */
|
|
|
|
u32 (*get_xclk)(struct radeon_device *rdev);
|
2013-01-24 23:35:23 +08:00
|
|
|
/* get the gpu clock counter */
|
|
|
|
uint64_t (*get_gpu_clock_counter)(struct radeon_device *rdev);
|
2014-10-01 21:17:12 +08:00
|
|
|
/* get register for info ioctl */
|
|
|
|
int (*get_allowed_info_register)(struct radeon_device *rdev, u32 reg, u32 *val);
|
2012-02-24 07:10:29 +08:00
|
|
|
/* gart */
|
2012-02-24 06:53:46 +08:00
|
|
|
struct {
|
|
|
|
void (*tlb_flush)(struct radeon_device *rdev);
|
2015-01-21 16:36:35 +08:00
|
|
|
uint64_t (*get_page_entry)(uint64_t addr, uint32_t flags);
|
2014-06-04 21:29:57 +08:00
|
|
|
void (*set_page)(struct radeon_device *rdev, unsigned i,
|
2015-01-21 16:36:35 +08:00
|
|
|
uint64_t entry);
|
2012-02-24 06:53:46 +08:00
|
|
|
} gart;
|
2012-08-07 02:21:10 +08:00
|
|
|
struct {
|
|
|
|
int (*init)(struct radeon_device *rdev);
|
|
|
|
void (*fini)(struct radeon_device *rdev);
|
2014-07-31 03:05:17 +08:00
|
|
|
void (*copy_pages)(struct radeon_device *rdev,
|
|
|
|
struct radeon_ib *ib,
|
|
|
|
uint64_t pe, uint64_t src,
|
|
|
|
unsigned count);
|
|
|
|
void (*write_pages)(struct radeon_device *rdev,
|
|
|
|
struct radeon_ib *ib,
|
|
|
|
uint64_t pe,
|
|
|
|
uint64_t addr, unsigned count,
|
|
|
|
uint32_t incr, uint32_t flags);
|
|
|
|
void (*set_pages)(struct radeon_device *rdev,
|
|
|
|
struct radeon_ib *ib,
|
|
|
|
uint64_t pe,
|
|
|
|
uint64_t addr, unsigned count,
|
|
|
|
uint32_t incr, uint32_t flags);
|
|
|
|
void (*pad_ib)(struct radeon_ib *ib);
|
2012-08-07 02:21:10 +08:00
|
|
|
} vm;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* ring specific callbacks */
|
2015-11-30 00:12:41 +08:00
|
|
|
const struct radeon_asic_ring *ring[RADEON_NUM_RINGS];
|
2012-02-24 07:10:29 +08:00
|
|
|
/* irqs */
|
2012-02-24 06:53:43 +08:00
|
|
|
struct {
|
|
|
|
int (*set)(struct radeon_device *rdev);
|
|
|
|
int (*process)(struct radeon_device *rdev);
|
|
|
|
} irq;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* displays */
|
2012-02-24 06:53:47 +08:00
|
|
|
struct {
|
|
|
|
/* display watermarks */
|
|
|
|
void (*bandwidth_update)(struct radeon_device *rdev);
|
|
|
|
/* get frame count */
|
|
|
|
u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
|
|
|
|
/* wait for vblank */
|
|
|
|
void (*wait_for_vblank)(struct radeon_device *rdev, int crtc);
|
2012-08-03 23:39:43 +08:00
|
|
|
/* set backlight level */
|
|
|
|
void (*set_backlight_level)(struct radeon_encoder *radeon_encoder, u8 level);
|
2012-09-14 21:59:26 +08:00
|
|
|
/* get backlight level */
|
|
|
|
u8 (*get_backlight_level)(struct radeon_encoder *radeon_encoder);
|
2013-04-18 23:32:16 +08:00
|
|
|
/* audio callbacks */
|
|
|
|
void (*hdmi_enable)(struct drm_encoder *encoder, bool enable);
|
|
|
|
void (*hdmi_setmode)(struct drm_encoder *encoder, struct drm_display_mode *mode);
|
2012-02-24 06:53:47 +08:00
|
|
|
} display;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* copy functions for bo handling */
|
2012-02-24 06:53:42 +08:00
|
|
|
struct {
|
2014-09-05 02:01:53 +08:00
|
|
|
struct radeon_fence *(*blit)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
|
|
|
unsigned num_gpu_pages,
|
|
|
|
struct reservation_object *resv);
|
2012-02-24 06:53:42 +08:00
|
|
|
u32 blit_ring_index;
|
2014-09-05 02:01:53 +08:00
|
|
|
struct radeon_fence *(*dma)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
|
|
|
unsigned num_gpu_pages,
|
|
|
|
struct reservation_object *resv);
|
2012-02-24 06:53:42 +08:00
|
|
|
u32 dma_ring_index;
|
|
|
|
/* method used for bo copy */
|
2014-09-05 02:01:53 +08:00
|
|
|
struct radeon_fence *(*copy)(struct radeon_device *rdev,
|
|
|
|
uint64_t src_offset,
|
|
|
|
uint64_t dst_offset,
|
|
|
|
unsigned num_gpu_pages,
|
|
|
|
struct reservation_object *resv);
|
2012-02-24 06:53:42 +08:00
|
|
|
/* ring used for bo copies */
|
|
|
|
u32 copy_ring_index;
|
|
|
|
} copy;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* surfaces */
|
2012-02-24 06:53:49 +08:00
|
|
|
struct {
|
|
|
|
int (*set_reg)(struct radeon_device *rdev, int reg,
|
|
|
|
uint32_t tiling_flags, uint32_t pitch,
|
|
|
|
uint32_t offset, uint32_t obj_size);
|
|
|
|
void (*clear_reg)(struct radeon_device *rdev, int reg);
|
|
|
|
} surface;
|
2012-02-24 07:10:29 +08:00
|
|
|
/* hotplug detect */
|
2012-02-24 06:53:39 +08:00
|
|
|
struct {
|
|
|
|
void (*init)(struct radeon_device *rdev);
|
|
|
|
void (*fini)(struct radeon_device *rdev);
|
|
|
|
bool (*sense)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
|
|
|
void (*set_polarity)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
|
|
|
|
} hpd;
|
2013-04-13 01:55:22 +08:00
|
|
|
/* static power management */
|
2012-02-24 06:53:41 +08:00
|
|
|
struct {
|
|
|
|
void (*misc)(struct radeon_device *rdev);
|
|
|
|
void (*prepare)(struct radeon_device *rdev);
|
|
|
|
void (*finish)(struct radeon_device *rdev);
|
|
|
|
void (*init_profile)(struct radeon_device *rdev);
|
|
|
|
void (*get_dynpm_state)(struct radeon_device *rdev);
|
2012-02-24 06:53:48 +08:00
|
|
|
uint32_t (*get_engine_clock)(struct radeon_device *rdev);
|
|
|
|
void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock);
|
|
|
|
uint32_t (*get_memory_clock)(struct radeon_device *rdev);
|
|
|
|
void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
|
|
|
|
int (*get_pcie_lanes)(struct radeon_device *rdev);
|
|
|
|
void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
|
|
|
|
void (*set_clock_gating)(struct radeon_device *rdev, int enable);
|
2013-04-08 18:41:30 +08:00
|
|
|
int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk);
|
2013-08-21 08:01:18 +08:00
|
|
|
int (*set_vce_clocks)(struct radeon_device *rdev, u32 evclk, u32 ecclk);
|
2013-06-22 02:38:03 +08:00
|
|
|
int (*get_temperature)(struct radeon_device *rdev);
|
2012-02-24 06:53:41 +08:00
|
|
|
} pm;
|
2013-04-13 01:55:22 +08:00
|
|
|
/* dynamic power management */
|
|
|
|
struct {
|
|
|
|
int (*init)(struct radeon_device *rdev);
|
|
|
|
void (*setup_asic)(struct radeon_device *rdev);
|
|
|
|
int (*enable)(struct radeon_device *rdev);
|
2013-12-20 00:37:22 +08:00
|
|
|
int (*late_enable)(struct radeon_device *rdev);
|
2013-04-13 01:55:22 +08:00
|
|
|
void (*disable)(struct radeon_device *rdev);
|
2013-01-17 01:52:04 +08:00
|
|
|
int (*pre_set_power_state)(struct radeon_device *rdev);
|
2013-04-13 01:55:22 +08:00
|
|
|
int (*set_power_state)(struct radeon_device *rdev);
|
2013-01-17 01:52:04 +08:00
|
|
|
void (*post_set_power_state)(struct radeon_device *rdev);
|
2013-04-13 01:55:22 +08:00
|
|
|
void (*display_configuration_changed)(struct radeon_device *rdev);
|
|
|
|
void (*fini)(struct radeon_device *rdev);
|
|
|
|
u32 (*get_sclk)(struct radeon_device *rdev, bool low);
|
|
|
|
u32 (*get_mclk)(struct radeon_device *rdev, bool low);
|
|
|
|
void (*print_power_state)(struct radeon_device *rdev, struct radeon_ps *ps);
|
2013-06-28 21:28:39 +08:00
|
|
|
void (*debugfs_print_current_performance_level)(struct radeon_device *rdev, struct seq_file *m);
|
2013-07-03 06:38:02 +08:00
|
|
|
int (*force_performance_level)(struct radeon_device *rdev, enum radeon_dpm_forced_level level);
|
2013-07-08 23:35:06 +08:00
|
|
|
bool (*vblank_too_short)(struct radeon_device *rdev);
|
2013-08-01 06:13:23 +08:00
|
|
|
void (*powergate_uvd)(struct radeon_device *rdev, bool gate);
|
2013-09-10 07:11:52 +08:00
|
|
|
void (*enable_bapm)(struct radeon_device *rdev, bool enable);
|
2014-12-08 05:10:44 +08:00
|
|
|
void (*fan_ctrl_set_mode)(struct radeon_device *rdev, u32 mode);
|
|
|
|
u32 (*fan_ctrl_get_mode)(struct radeon_device *rdev);
|
|
|
|
int (*set_fan_speed_percent)(struct radeon_device *rdev, u32 speed);
|
|
|
|
int (*get_fan_speed_percent)(struct radeon_device *rdev, u32 *speed);
|
2014-09-30 22:12:17 +08:00
|
|
|
u32 (*get_current_sclk)(struct radeon_device *rdev);
|
|
|
|
u32 (*get_current_mclk)(struct radeon_device *rdev);
|
2013-04-13 01:55:22 +08:00
|
|
|
} dpm;
|
2010-11-21 23:59:01 +08:00
|
|
|
/* pageflipping */
|
2012-02-24 06:53:40 +08:00
|
|
|
struct {
|
2016-04-01 17:51:34 +08:00
|
|
|
void (*page_flip)(struct radeon_device *rdev, int crtc, u64 crtc_base, bool async);
|
2014-05-27 22:49:20 +08:00
|
|
|
bool (*page_flip_pending)(struct radeon_device *rdev, int crtc);
|
2012-02-24 06:53:40 +08:00
|
|
|
} pflip;
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2009-09-11 21:55:33 +08:00
|
|
|
/*
|
|
|
|
* Asic structures
|
|
|
|
*/
|
2009-09-01 13:25:57 +08:00
|
|
|
struct r100_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
const unsigned *reg_safe_bm;
|
|
|
|
unsigned reg_safe_bm_size;
|
|
|
|
u32 hdp_cntl;
|
2009-09-01 13:25:57 +08:00
|
|
|
};
|
|
|
|
|
2009-09-11 21:55:33 +08:00
|
|
|
struct r300_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
const unsigned *reg_safe_bm;
|
|
|
|
unsigned reg_safe_bm_size;
|
|
|
|
u32 resync_scratch;
|
|
|
|
u32 hdp_cntl;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct r600_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2014-06-03 04:13:21 +08:00
|
|
|
unsigned active_simds;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct rv770_asic {
|
2010-03-09 22:45:10 +08:00
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_fize;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2014-06-03 04:13:21 +08:00
|
|
|
unsigned active_simds;
|
2009-09-11 21:55:33 +08:00
|
|
|
};
|
|
|
|
|
2010-03-25 01:33:47 +08:00
|
|
|
struct evergreen_asic {
|
|
|
|
unsigned num_ses;
|
|
|
|
unsigned max_pipes;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds;
|
|
|
|
unsigned max_backends;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
unsigned tiling_nbanks;
|
|
|
|
unsigned tiling_npipes;
|
|
|
|
unsigned tiling_group_size;
|
2010-06-05 01:10:12 +08:00
|
|
|
unsigned tile_config;
|
2011-07-16 03:53:52 +08:00
|
|
|
unsigned backend_map;
|
2014-06-03 04:13:21 +08:00
|
|
|
unsigned active_simds;
|
2010-03-25 01:33:47 +08:00
|
|
|
};
|
|
|
|
|
2011-03-03 09:07:29 +08:00
|
|
|
struct cayman_asic {
|
|
|
|
unsigned max_shader_engines;
|
|
|
|
unsigned max_pipes_per_simd;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_simds_per_se;
|
|
|
|
unsigned max_backends_per_se;
|
|
|
|
unsigned max_texture_channel_caches;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_threads;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned max_stack_entries;
|
|
|
|
unsigned sx_num_of_sets;
|
|
|
|
unsigned sx_max_export_size;
|
|
|
|
unsigned sx_max_export_pos_size;
|
|
|
|
unsigned sx_max_export_smx_size;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned sq_num_cf_insts;
|
|
|
|
unsigned sc_prim_fifo_size;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
|
|
|
|
unsigned num_shader_engines;
|
|
|
|
unsigned num_shader_pipes_per_simd;
|
|
|
|
unsigned num_tile_pipes;
|
|
|
|
unsigned num_simds_per_se;
|
|
|
|
unsigned num_backends_per_se;
|
|
|
|
unsigned backend_disable_mask_per_asic;
|
|
|
|
unsigned backend_map;
|
|
|
|
unsigned num_texture_channel_caches;
|
|
|
|
unsigned mem_max_burst_length_bytes;
|
|
|
|
unsigned mem_row_size_in_kb;
|
|
|
|
unsigned shader_engine_tile_size;
|
|
|
|
unsigned num_gpus;
|
|
|
|
unsigned multi_gpu_tile_size;
|
|
|
|
|
|
|
|
unsigned tile_config;
|
2014-06-03 04:13:21 +08:00
|
|
|
unsigned active_simds;
|
2011-03-03 09:07:29 +08:00
|
|
|
};
|
|
|
|
|
2012-03-21 05:18:11 +08:00
|
|
|
struct si_asic {
|
|
|
|
unsigned max_shader_engines;
|
|
|
|
unsigned max_tile_pipes;
|
2012-06-02 06:58:22 +08:00
|
|
|
unsigned max_cu_per_sh;
|
|
|
|
unsigned max_sh_per_se;
|
2012-03-21 05:18:11 +08:00
|
|
|
unsigned max_backends_per_se;
|
|
|
|
unsigned max_texture_channel_caches;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned sc_prim_fifo_size_frontend;
|
|
|
|
unsigned sc_prim_fifo_size_backend;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
|
|
|
|
unsigned num_tile_pipes;
|
2013-12-22 09:18:01 +08:00
|
|
|
unsigned backend_enable_mask;
|
2012-03-21 05:18:11 +08:00
|
|
|
unsigned backend_disable_mask_per_asic;
|
|
|
|
unsigned backend_map;
|
|
|
|
unsigned num_texture_channel_caches;
|
|
|
|
unsigned mem_max_burst_length_bytes;
|
|
|
|
unsigned mem_row_size_in_kb;
|
|
|
|
unsigned shader_engine_tile_size;
|
|
|
|
unsigned num_gpus;
|
|
|
|
unsigned multi_gpu_tile_size;
|
|
|
|
|
|
|
|
unsigned tile_config;
|
2013-04-09 23:17:08 +08:00
|
|
|
uint32_t tile_mode_array[32];
|
2014-06-03 04:13:21 +08:00
|
|
|
uint32_t active_cus;
|
2012-03-21 05:18:11 +08:00
|
|
|
};
|
|
|
|
|
2013-04-10 00:41:24 +08:00
|
|
|
struct cik_asic {
|
|
|
|
unsigned max_shader_engines;
|
|
|
|
unsigned max_tile_pipes;
|
|
|
|
unsigned max_cu_per_sh;
|
|
|
|
unsigned max_sh_per_se;
|
|
|
|
unsigned max_backends_per_se;
|
|
|
|
unsigned max_texture_channel_caches;
|
|
|
|
unsigned max_gprs;
|
|
|
|
unsigned max_gs_threads;
|
|
|
|
unsigned max_hw_contexts;
|
|
|
|
unsigned sc_prim_fifo_size_frontend;
|
|
|
|
unsigned sc_prim_fifo_size_backend;
|
|
|
|
unsigned sc_hiz_tile_fifo_size;
|
|
|
|
unsigned sc_earlyz_tile_fifo_size;
|
|
|
|
|
|
|
|
unsigned num_tile_pipes;
|
2013-12-22 09:18:01 +08:00
|
|
|
unsigned backend_enable_mask;
|
2013-04-10 00:41:24 +08:00
|
|
|
unsigned backend_disable_mask_per_asic;
|
|
|
|
unsigned backend_map;
|
|
|
|
unsigned num_texture_channel_caches;
|
|
|
|
unsigned mem_max_burst_length_bytes;
|
|
|
|
unsigned mem_row_size_in_kb;
|
|
|
|
unsigned shader_engine_tile_size;
|
|
|
|
unsigned num_gpus;
|
|
|
|
unsigned multi_gpu_tile_size;
|
|
|
|
|
|
|
|
unsigned tile_config;
|
2013-04-11 01:41:25 +08:00
|
|
|
uint32_t tile_mode_array[32];
|
2013-11-18 17:26:00 +08:00
|
|
|
uint32_t macrotile_mode_array[16];
|
2014-06-03 04:13:21 +08:00
|
|
|
uint32_t active_cus;
|
2013-04-10 00:41:24 +08:00
|
|
|
};
|
|
|
|
|
2009-06-17 19:28:30 +08:00
|
|
|
union radeon_asic_config {
|
|
|
|
struct r300_asic r300;
|
2009-09-01 13:25:57 +08:00
|
|
|
struct r100_asic r100;
|
2009-09-08 08:10:24 +08:00
|
|
|
struct r600_asic r600;
|
|
|
|
struct rv770_asic rv770;
|
2010-03-25 01:33:47 +08:00
|
|
|
struct evergreen_asic evergreen;
|
2011-03-03 09:07:29 +08:00
|
|
|
struct cayman_asic cayman;
|
2012-03-21 05:18:11 +08:00
|
|
|
struct si_asic si;
|
2013-04-10 00:41:24 +08:00
|
|
|
struct cik_asic cik;
|
2009-06-17 19:28:30 +08:00
|
|
|
};
|
|
|
|
|
2010-03-12 05:19:14 +08:00
|
|
|
/*
|
|
|
|
* asic initizalization from radeon_asic.c
|
|
|
|
*/
|
|
|
|
void radeon_agp_disable(struct radeon_device *rdev);
|
|
|
|
int radeon_asic_init(struct radeon_device *rdev);
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* IOCTL.
|
|
|
|
*/
|
|
|
|
int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
drm/radeon: add userptr support v8
This patch adds an IOCTL for turning a pointer supplied by
userspace into a buffer object.
It imposes several restrictions upon the memory being mapped:
1. It must be page aligned (both start/end addresses, i.e ptr and size).
2. It must be normal system memory, not a pointer into another map of IO
space (e.g. it must not be a GTT mmapping of another object).
3. The BO is mapped into GTT, so the maximum amount of memory mapped at
all times is still the GTT limit.
4. The BO is only mapped readonly for now, so no write support.
5. List of backing pages is only acquired once, so they represent a
snapshot of the first use.
Exporting and sharing as well as mapping of buffer objects created by
this function is forbidden and results in an -EPERM.
v2: squash all previous changes into first public version
v3: fix tabs, map readonly, don't use MM callback any more
v4: set TTM_PAGE_FLAG_SG so that TTM never messes with the pages,
pin/unpin pages on bind/unbind instead of populate/unpopulate
v5: rebased on 3.17-wip, IOCTL renamed to userptr, reject any unknown
flags, better handle READONLY flag, improve permission check
v6: fix ptr cast warning, use set_page_dirty/mark_page_accessed on unpin
v7: add warning about it's availability in the API definition
v8: drop access_ok check, fix VM mapping bits
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v4)
Reviewed-by: Jérôme Glisse <jglisse@redhat.com> (v4)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2014-08-07 15:36:00 +08:00
|
|
|
int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_gem_pin_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_pwrite_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_pread_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
2014-03-02 07:56:17 +08:00
|
|
|
int radeon_gem_op_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
2009-06-24 07:48:08 +08:00
|
|
|
int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
|
|
|
int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *filp);
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2011-10-28 22:30:02 +08:00
|
|
|
/* VRAM scratch page for HDP bug, default vram page */
|
|
|
|
struct r600_vram_scratch {
|
2010-08-28 01:59:54 +08:00
|
|
|
struct radeon_bo *robj;
|
|
|
|
volatile uint32_t *ptr;
|
2011-10-28 22:30:02 +08:00
|
|
|
u64 gpu_addr;
|
2010-08-28 01:59:54 +08:00
|
|
|
};
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2012-08-16 23:11:18 +08:00
|
|
|
/*
|
|
|
|
* ACPI
|
|
|
|
*/
|
|
|
|
struct radeon_atif_notification_cfg {
|
|
|
|
bool enabled;
|
|
|
|
int command_code;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_atif_notifications {
|
|
|
|
bool display_switch;
|
|
|
|
bool expansion_mode_change;
|
|
|
|
bool thermal_state;
|
|
|
|
bool forced_power_state;
|
|
|
|
bool system_power_state;
|
|
|
|
bool display_conf_change;
|
|
|
|
bool px_gfx_switch;
|
|
|
|
bool brightness_change;
|
|
|
|
bool dgpu_display_event;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_atif_functions {
|
|
|
|
bool system_params;
|
|
|
|
bool sbios_requests;
|
|
|
|
bool select_active_disp;
|
|
|
|
bool lid_state;
|
|
|
|
bool get_tv_standard;
|
|
|
|
bool set_tv_standard;
|
|
|
|
bool get_panel_expansion_mode;
|
|
|
|
bool set_panel_expansion_mode;
|
|
|
|
bool temperature_change;
|
|
|
|
bool graphics_device_types;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_atif {
|
|
|
|
struct radeon_atif_notifications notifications;
|
|
|
|
struct radeon_atif_functions functions;
|
|
|
|
struct radeon_atif_notification_cfg notification_cfg;
|
2012-08-03 23:39:43 +08:00
|
|
|
struct radeon_encoder *encoder_for_bl;
|
2012-08-16 23:11:18 +08:00
|
|
|
};
|
2011-11-11 01:57:26 +08:00
|
|
|
|
2012-08-16 23:13:43 +08:00
|
|
|
struct radeon_atcs_functions {
|
|
|
|
bool get_ext_state;
|
|
|
|
bool pcie_perf_req;
|
|
|
|
bool pcie_dev_rdy;
|
|
|
|
bool pcie_bus_width;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct radeon_atcs {
|
|
|
|
struct radeon_atcs_functions functions;
|
|
|
|
};
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* Core structure, functions and helpers.
|
|
|
|
*/
|
|
|
|
typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t);
|
|
|
|
typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t);
|
|
|
|
|
|
|
|
struct radeon_device {
|
2009-09-11 21:35:22 +08:00
|
|
|
struct device *dev;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct drm_device *ddev;
|
|
|
|
struct pci_dev *pdev;
|
2012-07-03 00:45:19 +08:00
|
|
|
struct rw_semaphore exclusive_lock;
|
2009-06-05 20:42:42 +08:00
|
|
|
/* ASIC */
|
2009-06-17 19:28:30 +08:00
|
|
|
union radeon_asic_config config;
|
2009-06-05 20:42:42 +08:00
|
|
|
enum radeon_family family;
|
|
|
|
unsigned long flags;
|
|
|
|
int usec_timeout;
|
|
|
|
enum radeon_pll_errata pll_errata;
|
|
|
|
int num_gb_pipes;
|
2009-08-20 07:11:39 +08:00
|
|
|
int num_z_pipes;
|
2009-06-05 20:42:42 +08:00
|
|
|
int disp_priority;
|
|
|
|
/* BIOS */
|
|
|
|
uint8_t *bios;
|
|
|
|
bool is_atom_bios;
|
|
|
|
uint16_t bios_header_start;
|
2009-11-20 21:29:23 +08:00
|
|
|
struct radeon_bo *stollen_vga_memory;
|
2009-06-05 20:42:42 +08:00
|
|
|
/* Register mmio */
|
2009-06-29 16:29:12 +08:00
|
|
|
resource_size_t rmmio_base;
|
|
|
|
resource_size_t rmmio_size;
|
2012-12-02 21:06:15 +08:00
|
|
|
/* protects concurrent MM_INDEX/DATA based register access */
|
|
|
|
spinlock_t mmio_idx_lock;
|
2013-09-04 06:19:42 +08:00
|
|
|
/* protects concurrent SMC based register access */
|
|
|
|
spinlock_t smc_idx_lock;
|
2013-09-04 07:00:09 +08:00
|
|
|
/* protects concurrent PLL register access */
|
|
|
|
spinlock_t pll_idx_lock;
|
|
|
|
/* protects concurrent MC register access */
|
|
|
|
spinlock_t mc_idx_lock;
|
|
|
|
/* protects concurrent PCIE register access */
|
|
|
|
spinlock_t pcie_idx_lock;
|
|
|
|
/* protects concurrent PCIE_PORT register access */
|
|
|
|
spinlock_t pciep_idx_lock;
|
|
|
|
/* protects concurrent PIF register access */
|
|
|
|
spinlock_t pif_idx_lock;
|
|
|
|
/* protects concurrent CG register access */
|
|
|
|
spinlock_t cg_idx_lock;
|
|
|
|
/* protects concurrent UVD register access */
|
|
|
|
spinlock_t uvd_idx_lock;
|
|
|
|
/* protects concurrent RCU register access */
|
|
|
|
spinlock_t rcu_idx_lock;
|
|
|
|
/* protects concurrent DIDT register access */
|
|
|
|
spinlock_t didt_idx_lock;
|
|
|
|
/* protects concurrent ENDPOINT (audio) register access */
|
|
|
|
spinlock_t end_idx_lock;
|
2011-07-13 14:28:12 +08:00
|
|
|
void __iomem *rmmio;
|
2009-06-05 20:42:42 +08:00
|
|
|
radeon_rreg_t mc_rreg;
|
|
|
|
radeon_wreg_t mc_wreg;
|
|
|
|
radeon_rreg_t pll_rreg;
|
|
|
|
radeon_wreg_t pll_wreg;
|
2009-08-12 16:43:14 +08:00
|
|
|
uint32_t pcie_reg_mask;
|
2009-06-05 20:42:42 +08:00
|
|
|
radeon_rreg_t pciep_rreg;
|
|
|
|
radeon_wreg_t pciep_wreg;
|
2010-06-30 23:52:50 +08:00
|
|
|
/* io port */
|
|
|
|
void __iomem *rio_mem;
|
|
|
|
resource_size_t rio_mem_size;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_clock clock;
|
|
|
|
struct radeon_mc mc;
|
|
|
|
struct radeon_gart gart;
|
|
|
|
struct radeon_mode_info mode_info;
|
|
|
|
struct radeon_scratch scratch;
|
2013-03-05 01:47:46 +08:00
|
|
|
struct radeon_doorbell doorbell;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_mman mman;
|
2011-08-26 01:39:48 +08:00
|
|
|
struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS];
|
2012-05-09 21:34:55 +08:00
|
|
|
wait_queue_head_t fence_queue;
|
2016-06-01 21:10:02 +08:00
|
|
|
u64 fence_context;
|
2012-05-09 21:34:45 +08:00
|
|
|
struct mutex ring_lock;
|
2011-10-23 18:56:27 +08:00
|
|
|
struct radeon_ring ring[RADEON_NUM_RINGS];
|
2012-05-09 21:34:58 +08:00
|
|
|
bool ib_pool_ready;
|
|
|
|
struct radeon_sa_manager ring_tmp_bo;
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_irq irq;
|
|
|
|
struct radeon_asic *asic;
|
|
|
|
struct radeon_gem gem;
|
2009-07-14 03:04:08 +08:00
|
|
|
struct radeon_pm pm;
|
2013-04-08 18:41:29 +08:00
|
|
|
struct radeon_uvd uvd;
|
2013-05-23 18:10:04 +08:00
|
|
|
struct radeon_vce vce;
|
2009-09-15 10:21:01 +08:00
|
|
|
uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH];
|
2009-06-05 20:42:42 +08:00
|
|
|
struct radeon_wb wb;
|
2009-09-08 08:10:24 +08:00
|
|
|
struct radeon_dummy_page dummy_page;
|
2009-06-05 20:42:42 +08:00
|
|
|
bool shutdown;
|
2009-07-10 20:36:26 +08:00
|
|
|
bool need_dma32;
|
2009-09-16 21:24:21 +08:00
|
|
|
bool accel_working;
|
2013-04-09 05:25:47 +08:00
|
|
|
bool fastfb_working; /* IGP feature*/
|
2014-08-28 04:45:18 +08:00
|
|
|
bool needs_reset, in_reset;
|
2009-06-24 07:48:08 +08:00
|
|
|
struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
|
2009-09-08 08:10:24 +08:00
|
|
|
const struct firmware *me_fw; /* all family ME firmware */
|
|
|
|
const struct firmware *pfp_fw; /* r6/700 PFP firmware */
|
2009-12-02 02:43:46 +08:00
|
|
|
const struct firmware *rlc_fw; /* r6/700 RLC firmware */
|
2011-01-07 10:19:31 +08:00
|
|
|
const struct firmware *mc_fw; /* NI MC firmware */
|
2012-03-21 05:18:17 +08:00
|
|
|
const struct firmware *ce_fw; /* SI CE firmware */
|
2012-12-19 10:43:07 +08:00
|
|
|
const struct firmware *mec_fw; /* CIK MEC firmware */
|
2014-06-26 07:32:36 +08:00
|
|
|
const struct firmware *mec2_fw; /* KV MEC2 firmware */
|
2013-04-10 00:47:11 +08:00
|
|
|
const struct firmware *sdma_fw; /* CIK SDMA firmware */
|
2013-06-26 12:11:19 +08:00
|
|
|
const struct firmware *smc_fw; /* SMC firmware */
|
2013-08-05 20:10:55 +08:00
|
|
|
const struct firmware *uvd_fw; /* UVD firmware */
|
2013-05-23 18:10:04 +08:00
|
|
|
const struct firmware *vce_fw; /* VCE firmware */
|
2014-06-26 06:41:34 +08:00
|
|
|
bool new_fw;
|
2011-10-28 22:30:02 +08:00
|
|
|
struct r600_vram_scratch vram_scratch;
|
2009-10-17 00:21:24 +08:00
|
|
|
int msi_enabled; /* msi enabled */
|
2009-12-02 02:43:46 +08:00
|
|
|
struct r600_ih ih; /* r6/700 interrupt ring */
|
2013-04-13 01:52:52 +08:00
|
|
|
struct radeon_rlc rlc;
|
2013-06-27 05:37:11 +08:00
|
|
|
struct radeon_mec mec;
|
2015-12-04 07:26:07 +08:00
|
|
|
struct delayed_work hotplug_work;
|
2015-02-24 07:23:56 +08:00
|
|
|
struct work_struct dp_work;
|
2012-03-30 20:59:57 +08:00
|
|
|
struct work_struct audio_work;
|
2010-02-02 05:02:25 +08:00
|
|
|
int num_crtc; /* number of crtcs */
|
2009-12-23 16:23:21 +08:00
|
|
|
struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
|
2013-05-15 00:08:35 +08:00
|
|
|
bool has_uvd;
|
2016-03-18 23:58:32 +08:00
|
|
|
bool has_vce;
|
2013-08-01 04:51:33 +08:00
|
|
|
struct r600_audio audio; /* audio stuff */
|
2010-05-08 03:10:16 +08:00
|
|
|
struct notifier_block acpi_nb;
|
2011-01-05 12:46:48 +08:00
|
|
|
/* only one userspace can use Hyperz features or CMASK at a time */
|
2010-07-13 09:11:11 +08:00
|
|
|
struct drm_file *hyperz_filp;
|
2011-01-05 12:46:48 +08:00
|
|
|
struct drm_file *cmask_filp;
|
2010-08-06 09:21:16 +08:00
|
|
|
/* i2c buses */
|
|
|
|
struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
|
2011-10-24 20:54:54 +08:00
|
|
|
/* debugfs */
|
|
|
|
struct radeon_debugfs debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
|
|
|
|
unsigned debugfs_count;
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/* virtual memory */
|
|
|
|
struct radeon_vm_manager vm_manager;
|
2012-08-09 22:34:17 +08:00
|
|
|
struct mutex gpu_clock_mutex;
|
2014-03-02 07:56:18 +08:00
|
|
|
/* memory stats */
|
|
|
|
atomic64_t vram_usage;
|
|
|
|
atomic64_t gtt_usage;
|
|
|
|
atomic64_t num_bytes_moved;
|
2015-04-30 01:40:33 +08:00
|
|
|
atomic_t gpu_reset_counter;
|
2012-08-16 23:11:18 +08:00
|
|
|
/* ACPI interface */
|
|
|
|
struct radeon_atif atif;
|
2012-08-16 23:13:43 +08:00
|
|
|
struct radeon_atcs atcs;
|
2013-08-07 00:40:16 +08:00
|
|
|
/* srbm instance registers */
|
|
|
|
struct mutex srbm_mutex;
|
2014-07-14 20:36:08 +08:00
|
|
|
/* GRBM index mutex. Protects concurrents access to GRBM index */
|
|
|
|
struct mutex grbm_idx_mutex;
|
2013-08-09 04:31:25 +08:00
|
|
|
/* clock, powergating flags */
|
|
|
|
u32 cg_flags;
|
|
|
|
u32 pg_flags;
|
2012-09-17 12:40:31 +08:00
|
|
|
|
|
|
|
struct dev_pm_domain vga_pm_domain;
|
|
|
|
bool have_disp_power_ref;
|
2014-07-18 23:54:20 +08:00
|
|
|
u32 px_quirk_flags;
|
2014-07-18 00:09:25 +08:00
|
|
|
|
|
|
|
/* tracking pinned memory */
|
|
|
|
u64 vram_pin_size;
|
|
|
|
u64 gart_pin_size;
|
2014-08-07 15:36:03 +08:00
|
|
|
|
2014-07-15 18:53:32 +08:00
|
|
|
/* amdkfd interface */
|
|
|
|
struct kfd_dev *kfd;
|
|
|
|
|
2014-08-07 15:36:03 +08:00
|
|
|
struct mutex mn_lock;
|
|
|
|
DECLARE_HASHTABLE(mn_hash, 7);
|
2009-06-05 20:42:42 +08:00
|
|
|
};
|
|
|
|
|
2014-04-11 10:29:01 +08:00
|
|
|
bool radeon_is_px(struct drm_device *dev);
|
2009-06-05 20:42:42 +08:00
|
|
|
int radeon_device_init(struct radeon_device *rdev,
|
|
|
|
struct drm_device *ddev,
|
|
|
|
struct pci_dev *pdev,
|
|
|
|
uint32_t flags);
|
|
|
|
void radeon_device_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
|
|
|
|
|
2014-04-21 01:29:33 +08:00
|
|
|
#define RADEON_MIN_MMIO_SIZE 0x10000
|
|
|
|
|
radeon: Deinline indirect register accessor functions
This patch deinlines indirect register accessor functions.
These functions perform two mmio accesses, framed by spin lock/unlock.
Spin lock/unlock by itself takes more than 50 cycles in ideal case
(if lock is exclusively cached on current CPU).
With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:
r600_uvd_ctx_rreg: 111 bytes, 4 callsites
r600_uvd_ctx_wreg: 113 bytes, 5 callsites
eg_pif_phy0_rreg: 106 bytes, 13 callsites
eg_pif_phy0_wreg: 108 bytes, 13 callsites
eg_pif_phy1_rreg: 107 bytes, 13 callsites
eg_pif_phy1_wreg: 108 bytes, 13 callsites
rv370_pcie_rreg: 111 bytes, 21 callsites
rv370_pcie_wreg: 113 bytes, 24 callsites
r600_rcu_rreg: 111 bytes, 16 callsites
r600_rcu_wreg: 113 bytes, 25 callsites
cik_didt_rreg: 106 bytes, 10 callsites
cik_didt_wreg: 107 bytes, 10 callsites
tn_smc_rreg: 106 bytes, 126 callsites
tn_smc_wreg: 107 bytes, 116 callsites
eg_cg_rreg: 107 bytes, 20 callsites
eg_cg_wreg: 108 bytes, 52 callsites
Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and
a locked (slow) path. This patch deinlines only slow path.
r100_mm_rreg_slow: 78 bytes, 2083 callsites
r100_mm_wreg_slow: 81 bytes, 3570 callsites
Reduction in code size is more than 65,000 bytes:
text data bss dec hex filename
85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before
85674192 22294776 20627456 128598664 7aa4288 vmlinux
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2015-05-20 19:02:37 +08:00
|
|
|
uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg);
|
|
|
|
void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v);
|
2014-04-21 01:29:33 +08:00
|
|
|
static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
|
|
|
|
bool always_indirect)
|
|
|
|
{
|
|
|
|
/* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
|
|
|
|
if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
|
|
|
|
return readl(((void __iomem *)rdev->rmmio) + reg);
|
radeon: Deinline indirect register accessor functions
This patch deinlines indirect register accessor functions.
These functions perform two mmio accesses, framed by spin lock/unlock.
Spin lock/unlock by itself takes more than 50 cycles in ideal case
(if lock is exclusively cached on current CPU).
With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:
r600_uvd_ctx_rreg: 111 bytes, 4 callsites
r600_uvd_ctx_wreg: 113 bytes, 5 callsites
eg_pif_phy0_rreg: 106 bytes, 13 callsites
eg_pif_phy0_wreg: 108 bytes, 13 callsites
eg_pif_phy1_rreg: 107 bytes, 13 callsites
eg_pif_phy1_wreg: 108 bytes, 13 callsites
rv370_pcie_rreg: 111 bytes, 21 callsites
rv370_pcie_wreg: 113 bytes, 24 callsites
r600_rcu_rreg: 111 bytes, 16 callsites
r600_rcu_wreg: 113 bytes, 25 callsites
cik_didt_rreg: 106 bytes, 10 callsites
cik_didt_wreg: 107 bytes, 10 callsites
tn_smc_rreg: 106 bytes, 126 callsites
tn_smc_wreg: 107 bytes, 116 callsites
eg_cg_rreg: 107 bytes, 20 callsites
eg_cg_wreg: 108 bytes, 52 callsites
Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and
a locked (slow) path. This patch deinlines only slow path.
r100_mm_rreg_slow: 78 bytes, 2083 callsites
r100_mm_wreg_slow: 81 bytes, 3570 callsites
Reduction in code size is more than 65,000 bytes:
text data bss dec hex filename
85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before
85674192 22294776 20627456 128598664 7aa4288 vmlinux
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2015-05-20 19:02:37 +08:00
|
|
|
else
|
|
|
|
return r100_mm_rreg_slow(rdev, reg);
|
2014-04-21 01:29:33 +08:00
|
|
|
}
|
|
|
|
static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
|
|
|
|
bool always_indirect)
|
|
|
|
{
|
|
|
|
if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
|
|
|
|
writel(v, ((void __iomem *)rdev->rmmio) + reg);
|
radeon: Deinline indirect register accessor functions
This patch deinlines indirect register accessor functions.
These functions perform two mmio accesses, framed by spin lock/unlock.
Spin lock/unlock by itself takes more than 50 cycles in ideal case
(if lock is exclusively cached on current CPU).
With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:
r600_uvd_ctx_rreg: 111 bytes, 4 callsites
r600_uvd_ctx_wreg: 113 bytes, 5 callsites
eg_pif_phy0_rreg: 106 bytes, 13 callsites
eg_pif_phy0_wreg: 108 bytes, 13 callsites
eg_pif_phy1_rreg: 107 bytes, 13 callsites
eg_pif_phy1_wreg: 108 bytes, 13 callsites
rv370_pcie_rreg: 111 bytes, 21 callsites
rv370_pcie_wreg: 113 bytes, 24 callsites
r600_rcu_rreg: 111 bytes, 16 callsites
r600_rcu_wreg: 113 bytes, 25 callsites
cik_didt_rreg: 106 bytes, 10 callsites
cik_didt_wreg: 107 bytes, 10 callsites
tn_smc_rreg: 106 bytes, 126 callsites
tn_smc_wreg: 107 bytes, 116 callsites
eg_cg_rreg: 107 bytes, 20 callsites
eg_cg_wreg: 108 bytes, 52 callsites
Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and
a locked (slow) path. This patch deinlines only slow path.
r100_mm_rreg_slow: 78 bytes, 2083 callsites
r100_mm_wreg_slow: 81 bytes, 3570 callsites
Reduction in code size is more than 65,000 bytes:
text data bss dec hex filename
85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before
85674192 22294776 20627456 128598664 7aa4288 vmlinux
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2015-05-20 19:02:37 +08:00
|
|
|
else
|
|
|
|
r100_mm_wreg_slow(rdev, reg, v);
|
2014-04-21 01:29:33 +08:00
|
|
|
}
|
|
|
|
|
2011-10-14 07:08:42 +08:00
|
|
|
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
2010-06-30 23:52:50 +08:00
|
|
|
|
2013-11-14 04:54:17 +08:00
|
|
|
u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index);
|
|
|
|
void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v);
|
2013-03-05 01:47:46 +08:00
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
/*
|
|
|
|
* Cast helper
|
|
|
|
*/
|
2016-10-25 20:00:45 +08:00
|
|
|
extern const struct dma_fence_ops radeon_fence_ops;
|
2014-01-09 18:03:12 +08:00
|
|
|
|
2016-10-25 20:00:45 +08:00
|
|
|
static inline struct radeon_fence *to_radeon_fence(struct dma_fence *f)
|
2014-01-09 18:03:12 +08:00
|
|
|
{
|
|
|
|
struct radeon_fence *__f = container_of(f, struct radeon_fence, base);
|
|
|
|
|
|
|
|
if (__f->base.ops == &radeon_fence_ops)
|
|
|
|
return __f;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2009-06-05 20:42:42 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Registers read & write functions.
|
|
|
|
*/
|
2011-07-13 14:28:12 +08:00
|
|
|
#define RREG8(reg) readb((rdev->rmmio) + (reg))
|
|
|
|
#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg))
|
|
|
|
#define RREG16(reg) readw((rdev->rmmio) + (reg))
|
|
|
|
#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg))
|
2012-12-02 21:02:51 +08:00
|
|
|
#define RREG32(reg) r100_mm_rreg(rdev, (reg), false)
|
|
|
|
#define RREG32_IDX(reg) r100_mm_rreg(rdev, (reg), true)
|
|
|
|
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg), false))
|
|
|
|
#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v), false)
|
|
|
|
#define WREG32_IDX(reg, v) r100_mm_wreg(rdev, (reg), (v), true)
|
2009-06-05 20:42:42 +08:00
|
|
|
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
|
|
|
|
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
|
|
|
|
#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PLL(reg, v) rdev->pll_wreg(rdev, (reg), (v))
|
|
|
|
#define RREG32_MC(reg) rdev->mc_rreg(rdev, (reg))
|
|
|
|
#define WREG32_MC(reg, v) rdev->mc_wreg(rdev, (reg), (v))
|
2009-08-12 16:43:14 +08:00
|
|
|
#define RREG32_PCIE(reg) rv370_pcie_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PCIE(reg, v) rv370_pcie_wreg(rdev, (reg), (v))
|
2012-10-26 04:06:59 +08:00
|
|
|
#define RREG32_PCIE_PORT(reg) rdev->pciep_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PCIE_PORT(reg, v) rdev->pciep_wreg(rdev, (reg), (v))
|
2012-04-21 00:39:49 +08:00
|
|
|
#define RREG32_SMC(reg) tn_smc_rreg(rdev, (reg))
|
|
|
|
#define WREG32_SMC(reg, v) tn_smc_wreg(rdev, (reg), (v))
|
2013-04-12 23:27:20 +08:00
|
|
|
#define RREG32_RCU(reg) r600_rcu_rreg(rdev, (reg))
|
|
|
|
#define WREG32_RCU(reg, v) r600_rcu_wreg(rdev, (reg), (v))
|
2013-04-12 23:49:51 +08:00
|
|
|
#define RREG32_CG(reg) eg_cg_rreg(rdev, (reg))
|
|
|
|
#define WREG32_CG(reg, v) eg_cg_wreg(rdev, (reg), (v))
|
2013-02-15 07:18:12 +08:00
|
|
|
#define RREG32_PIF_PHY0(reg) eg_pif_phy0_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PIF_PHY0(reg, v) eg_pif_phy0_wreg(rdev, (reg), (v))
|
|
|
|
#define RREG32_PIF_PHY1(reg) eg_pif_phy1_rreg(rdev, (reg))
|
|
|
|
#define WREG32_PIF_PHY1(reg, v) eg_pif_phy1_wreg(rdev, (reg), (v))
|
2013-02-26 04:18:39 +08:00
|
|
|
#define RREG32_UVD_CTX(reg) r600_uvd_ctx_rreg(rdev, (reg))
|
|
|
|
#define WREG32_UVD_CTX(reg, v) r600_uvd_ctx_wreg(rdev, (reg), (v))
|
2013-04-20 01:03:37 +08:00
|
|
|
#define RREG32_DIDT(reg) cik_didt_rreg(rdev, (reg))
|
|
|
|
#define WREG32_DIDT(reg, v) cik_didt_wreg(rdev, (reg), (v))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define WREG32_P(reg, val, mask) \
|
|
|
|
do { \
|
|
|
|
uint32_t tmp_ = RREG32(reg); \
|
|
|
|
tmp_ &= (mask); \
|
|
|
|
tmp_ |= ((val) & ~(mask)); \
|
|
|
|
WREG32(reg, tmp_); \
|
|
|
|
} while (0)
|
2013-04-14 07:26:19 +08:00
|
|
|
#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
|
2013-08-16 00:55:22 +08:00
|
|
|
#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define WREG32_PLL_P(reg, val, mask) \
|
|
|
|
do { \
|
|
|
|
uint32_t tmp_ = RREG32_PLL(reg); \
|
|
|
|
tmp_ &= (mask); \
|
|
|
|
tmp_ |= ((val) & ~(mask)); \
|
|
|
|
WREG32_PLL(reg, tmp_); \
|
|
|
|
} while (0)
|
2015-05-12 04:01:49 +08:00
|
|
|
#define WREG32_SMC_P(reg, val, mask) \
|
|
|
|
do { \
|
|
|
|
uint32_t tmp_ = RREG32_SMC(reg); \
|
|
|
|
tmp_ &= (mask); \
|
|
|
|
tmp_ |= ((val) & ~(mask)); \
|
|
|
|
WREG32_SMC(reg, tmp_); \
|
|
|
|
} while (0)
|
2012-12-02 21:02:51 +08:00
|
|
|
#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
|
2010-06-30 23:52:50 +08:00
|
|
|
#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
|
|
|
|
#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2013-11-14 04:54:17 +08:00
|
|
|
#define RDOORBELL32(index) cik_mm_rdoorbell(rdev, (index))
|
|
|
|
#define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v))
|
2013-03-05 01:47:46 +08:00
|
|
|
|
2009-08-12 16:43:14 +08:00
|
|
|
/*
|
radeon: Deinline indirect register accessor functions
This patch deinlines indirect register accessor functions.
These functions perform two mmio accesses, framed by spin lock/unlock.
Spin lock/unlock by itself takes more than 50 cycles in ideal case
(if lock is exclusively cached on current CPU).
With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:
r600_uvd_ctx_rreg: 111 bytes, 4 callsites
r600_uvd_ctx_wreg: 113 bytes, 5 callsites
eg_pif_phy0_rreg: 106 bytes, 13 callsites
eg_pif_phy0_wreg: 108 bytes, 13 callsites
eg_pif_phy1_rreg: 107 bytes, 13 callsites
eg_pif_phy1_wreg: 108 bytes, 13 callsites
rv370_pcie_rreg: 111 bytes, 21 callsites
rv370_pcie_wreg: 113 bytes, 24 callsites
r600_rcu_rreg: 111 bytes, 16 callsites
r600_rcu_wreg: 113 bytes, 25 callsites
cik_didt_rreg: 106 bytes, 10 callsites
cik_didt_wreg: 107 bytes, 10 callsites
tn_smc_rreg: 106 bytes, 126 callsites
tn_smc_wreg: 107 bytes, 116 callsites
eg_cg_rreg: 107 bytes, 20 callsites
eg_cg_wreg: 108 bytes, 52 callsites
Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and
a locked (slow) path. This patch deinlines only slow path.
r100_mm_rreg_slow: 78 bytes, 2083 callsites
r100_mm_wreg_slow: 81 bytes, 3570 callsites
Reduction in code size is more than 65,000 bytes:
text data bss dec hex filename
85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before
85674192 22294776 20627456 128598664 7aa4288 vmlinux
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2015-05-20 19:02:37 +08:00
|
|
|
* Indirect registers accessors.
|
|
|
|
* They used to be inlined, but this increases code size by ~65 kbytes.
|
|
|
|
* Since each performs a pair of MMIO ops
|
|
|
|
* within a spin_lock_irqsave/spin_unlock_irqrestore region,
|
|
|
|
* the cost of call+ret is almost negligible. MMIO and locking
|
|
|
|
* costs several dozens of cycles each at best, call+ret is ~5 cycles.
|
2009-08-12 16:43:14 +08:00
|
|
|
*/
|
radeon: Deinline indirect register accessor functions
This patch deinlines indirect register accessor functions.
These functions perform two mmio accesses, framed by spin lock/unlock.
Spin lock/unlock by itself takes more than 50 cycles in ideal case
(if lock is exclusively cached on current CPU).
With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:
r600_uvd_ctx_rreg: 111 bytes, 4 callsites
r600_uvd_ctx_wreg: 113 bytes, 5 callsites
eg_pif_phy0_rreg: 106 bytes, 13 callsites
eg_pif_phy0_wreg: 108 bytes, 13 callsites
eg_pif_phy1_rreg: 107 bytes, 13 callsites
eg_pif_phy1_wreg: 108 bytes, 13 callsites
rv370_pcie_rreg: 111 bytes, 21 callsites
rv370_pcie_wreg: 113 bytes, 24 callsites
r600_rcu_rreg: 111 bytes, 16 callsites
r600_rcu_wreg: 113 bytes, 25 callsites
cik_didt_rreg: 106 bytes, 10 callsites
cik_didt_wreg: 107 bytes, 10 callsites
tn_smc_rreg: 106 bytes, 126 callsites
tn_smc_wreg: 107 bytes, 116 callsites
eg_cg_rreg: 107 bytes, 20 callsites
eg_cg_wreg: 108 bytes, 52 callsites
Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and
a locked (slow) path. This patch deinlines only slow path.
r100_mm_rreg_slow: 78 bytes, 2083 callsites
r100_mm_wreg_slow: 81 bytes, 3570 callsites
Reduction in code size is more than 65,000 bytes:
text data bss dec hex filename
85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before
85674192 22294776 20627456 128598664 7aa4288 vmlinux
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2015-05-20 19:02:37 +08:00
|
|
|
uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
|
|
|
|
void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
|
|
|
|
u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
|
|
|
u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
|
|
|
u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
|
|
|
u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
|
|
|
u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
|
|
|
u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
|
|
|
u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg);
|
|
|
|
void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
2013-04-20 01:03:37 +08:00
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
void r100_pll_errata_after_index(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ASICs helpers.
|
|
|
|
*/
|
2009-07-14 00:02:32 +08:00
|
|
|
#define ASIC_IS_RN50(rdev) ((rdev->pdev->device == 0x515e) || \
|
|
|
|
(rdev->pdev->device == 0x5969))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_RV100(rdev) ((rdev->family == CHIP_RV100) || \
|
|
|
|
(rdev->family == CHIP_RV200) || \
|
|
|
|
(rdev->family == CHIP_RS100) || \
|
|
|
|
(rdev->family == CHIP_RS200) || \
|
|
|
|
(rdev->family == CHIP_RV250) || \
|
|
|
|
(rdev->family == CHIP_RV280) || \
|
|
|
|
(rdev->family == CHIP_RS300))
|
|
|
|
#define ASIC_IS_R300(rdev) ((rdev->family == CHIP_R300) || \
|
|
|
|
(rdev->family == CHIP_RV350) || \
|
|
|
|
(rdev->family == CHIP_R350) || \
|
|
|
|
(rdev->family == CHIP_RV380) || \
|
|
|
|
(rdev->family == CHIP_R420) || \
|
|
|
|
(rdev->family == CHIP_R423) || \
|
|
|
|
(rdev->family == CHIP_RV410) || \
|
|
|
|
(rdev->family == CHIP_RS400) || \
|
|
|
|
(rdev->family == CHIP_RS480))
|
2011-01-07 07:49:34 +08:00
|
|
|
#define ASIC_IS_X2(rdev) ((rdev->ddev->pdev->device == 0x9441) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9443) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x944B) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9506) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x9509) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x950F) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x689C) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x689D))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_AVIVO(rdev) ((rdev->family >= CHIP_RS600))
|
2010-11-17 01:09:41 +08:00
|
|
|
#define ASIC_IS_DCE2(rdev) ((rdev->family == CHIP_RS600) || \
|
|
|
|
(rdev->family == CHIP_RS690) || \
|
|
|
|
(rdev->family == CHIP_RS740) || \
|
|
|
|
(rdev->family >= CHIP_R600))
|
2009-06-05 20:42:42 +08:00
|
|
|
#define ASIC_IS_DCE3(rdev) ((rdev->family >= CHIP_RV620))
|
|
|
|
#define ASIC_IS_DCE32(rdev) ((rdev->family >= CHIP_RV730))
|
2010-01-13 06:54:34 +08:00
|
|
|
#define ASIC_IS_DCE4(rdev) ((rdev->family >= CHIP_CEDAR))
|
2011-01-07 10:19:11 +08:00
|
|
|
#define ASIC_IS_DCE41(rdev) ((rdev->family >= CHIP_PALM) && \
|
|
|
|
(rdev->flags & RADEON_IS_IGP))
|
2011-01-07 10:19:12 +08:00
|
|
|
#define ASIC_IS_DCE5(rdev) ((rdev->family >= CHIP_BARTS))
|
2012-03-21 05:18:28 +08:00
|
|
|
#define ASIC_IS_DCE6(rdev) ((rdev->family >= CHIP_ARUBA))
|
|
|
|
#define ASIC_IS_DCE61(rdev) ((rdev->family >= CHIP_ARUBA) && \
|
|
|
|
(rdev->flags & RADEON_IS_IGP))
|
2012-12-19 06:01:35 +08:00
|
|
|
#define ASIC_IS_DCE64(rdev) ((rdev->family == CHIP_OLAND))
|
2012-07-27 06:53:55 +08:00
|
|
|
#define ASIC_IS_NODCE(rdev) ((rdev->family == CHIP_HAINAN))
|
2013-06-07 23:37:11 +08:00
|
|
|
#define ASIC_IS_DCE8(rdev) ((rdev->family >= CHIP_BONAIRE))
|
2014-04-08 23:28:54 +08:00
|
|
|
#define ASIC_IS_DCE81(rdev) ((rdev->family == CHIP_KAVERI))
|
|
|
|
#define ASIC_IS_DCE82(rdev) ((rdev->family == CHIP_BONAIRE))
|
2014-05-09 06:26:23 +08:00
|
|
|
#define ASIC_IS_DCE83(rdev) ((rdev->family == CHIP_KABINI) || \
|
|
|
|
(rdev->family == CHIP_MULLINS))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2013-06-26 12:33:35 +08:00
|
|
|
#define ASIC_IS_LOMBOK(rdev) ((rdev->ddev->pdev->device == 0x6849) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x6850) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x6858) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x6859) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x6840) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x6841) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x6842) || \
|
|
|
|
(rdev->ddev->pdev->device == 0x6843))
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
/*
|
|
|
|
* BIOS helpers.
|
|
|
|
*/
|
|
|
|
#define RBIOS8(i) (rdev->bios[i])
|
|
|
|
#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
|
|
|
|
#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
|
|
|
|
|
|
|
|
int radeon_combios_init(struct radeon_device *rdev);
|
|
|
|
void radeon_combios_fini(struct radeon_device *rdev);
|
|
|
|
int radeon_atombios_init(struct radeon_device *rdev);
|
|
|
|
void radeon_atombios_fini(struct radeon_device *rdev);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* RING helpers.
|
|
|
|
*/
|
2014-08-29 18:12:38 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* radeon_ring_write - write a value to the ring
|
|
|
|
*
|
|
|
|
* @ring: radeon_ring structure holding ring information
|
|
|
|
* @v: dword (dw) value to write
|
|
|
|
*
|
|
|
|
* Write a value to the requested ring buffer (all asics).
|
|
|
|
*/
|
2011-10-23 18:56:27 +08:00
|
|
|
static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
|
2009-06-05 20:42:42 +08:00
|
|
|
{
|
2014-08-29 18:12:38 +08:00
|
|
|
if (ring->count_dw <= 0)
|
|
|
|
DRM_ERROR("radeon: writing more dwords to the ring than expected!\n");
|
|
|
|
|
2011-10-23 18:56:27 +08:00
|
|
|
ring->ring[ring->wptr++] = v;
|
|
|
|
ring->wptr &= ring->ptr_mask;
|
|
|
|
ring->count_dw--;
|
|
|
|
ring->ring_free_dw--;
|
2009-06-05 20:42:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ASICs macro.
|
|
|
|
*/
|
2009-06-17 19:28:30 +08:00
|
|
|
#define radeon_init(rdev) (rdev)->asic->init((rdev))
|
2009-09-08 08:10:24 +08:00
|
|
|
#define radeon_fini(rdev) (rdev)->asic->fini((rdev))
|
|
|
|
#define radeon_resume(rdev) (rdev)->asic->resume((rdev))
|
|
|
|
#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
|
2013-08-13 17:56:50 +08:00
|
|
|
#define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)]->cs_parse((p))
|
2009-09-21 12:33:58 +08:00
|
|
|
#define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
|
2016-03-18 23:58:38 +08:00
|
|
|
#define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev), false)
|
2012-02-24 06:53:46 +08:00
|
|
|
#define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
|
2015-01-21 16:36:35 +08:00
|
|
|
#define radeon_gart_get_page_entry(a, f) (rdev)->asic->gart.get_page_entry((a), (f))
|
|
|
|
#define radeon_gart_set_page(rdev, i, e) (rdev)->asic->gart.set_page((rdev), (i), (e))
|
2012-08-07 02:21:10 +08:00
|
|
|
#define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
|
|
|
|
#define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
|
2014-07-31 03:05:17 +08:00
|
|
|
#define radeon_asic_vm_copy_pages(rdev, ib, pe, src, count) ((rdev)->asic->vm.copy_pages((rdev), (ib), (pe), (src), (count)))
|
|
|
|
#define radeon_asic_vm_write_pages(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.write_pages((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
|
|
|
|
#define radeon_asic_vm_set_pages(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_pages((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
|
|
|
|
#define radeon_asic_vm_pad_ib(rdev, ib) ((rdev)->asic->vm.pad_ib((ib)))
|
2013-08-13 17:56:50 +08:00
|
|
|
#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_start((rdev), (cp))
|
|
|
|
#define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_test((rdev), (cp))
|
|
|
|
#define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ib_test((rdev), (cp))
|
|
|
|
#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib))
|
|
|
|
#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib))
|
|
|
|
#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp))
|
2014-11-19 21:01:19 +08:00
|
|
|
#define radeon_ring_vm_flush(rdev, r, vm_id, pd_addr) (rdev)->asic->ring[(r)->idx]->vm_flush((rdev), (r), (vm_id), (pd_addr))
|
2013-08-13 17:56:50 +08:00
|
|
|
#define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r))
|
|
|
|
#define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r))
|
|
|
|
#define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r))
|
2012-02-24 06:53:43 +08:00
|
|
|
#define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev))
|
|
|
|
#define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev))
|
2012-02-24 06:53:47 +08:00
|
|
|
#define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc))
|
2012-08-03 23:39:43 +08:00
|
|
|
#define radeon_set_backlight_level(rdev, e, l) (rdev)->asic->display.set_backlight_level((e), (l))
|
2012-09-14 21:59:26 +08:00
|
|
|
#define radeon_get_backlight_level(rdev, e) (rdev)->asic->display.get_backlight_level((e))
|
2013-04-18 23:32:16 +08:00
|
|
|
#define radeon_hdmi_enable(rdev, e, b) (rdev)->asic->display.hdmi_enable((e), (b))
|
|
|
|
#define radeon_hdmi_setmode(rdev, e, m) (rdev)->asic->display.hdmi_setmode((e), (m))
|
2013-08-13 17:56:50 +08:00
|
|
|
#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)]->emit_fence((rdev), (fence))
|
|
|
|
#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)]->emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
|
2014-09-05 02:01:53 +08:00
|
|
|
#define radeon_copy_blit(rdev, s, d, np, resv) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (resv))
|
|
|
|
#define radeon_copy_dma(rdev, s, d, np, resv) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (resv))
|
|
|
|
#define radeon_copy(rdev, s, d, np, resv) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (resv))
|
2012-02-24 06:53:42 +08:00
|
|
|
#define radeon_copy_blit_ring_index(rdev) (rdev)->asic->copy.blit_ring_index
|
|
|
|
#define radeon_copy_dma_ring_index(rdev) (rdev)->asic->copy.dma_ring_index
|
|
|
|
#define radeon_copy_ring_index(rdev) (rdev)->asic->copy.copy_ring_index
|
2012-02-24 06:53:48 +08:00
|
|
|
#define radeon_get_engine_clock(rdev) (rdev)->asic->pm.get_engine_clock((rdev))
|
|
|
|
#define radeon_set_engine_clock(rdev, e) (rdev)->asic->pm.set_engine_clock((rdev), (e))
|
|
|
|
#define radeon_get_memory_clock(rdev) (rdev)->asic->pm.get_memory_clock((rdev))
|
|
|
|
#define radeon_set_memory_clock(rdev, e) (rdev)->asic->pm.set_memory_clock((rdev), (e))
|
|
|
|
#define radeon_get_pcie_lanes(rdev) (rdev)->asic->pm.get_pcie_lanes((rdev))
|
|
|
|
#define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l))
|
|
|
|
#define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e))
|
2013-04-08 18:41:30 +08:00
|
|
|
#define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d))
|
2013-08-21 08:01:18 +08:00
|
|
|
#define radeon_set_vce_clocks(rdev, ev, ec) (rdev)->asic->pm.set_vce_clocks((rdev), (ev), (ec))
|
2013-06-22 02:38:03 +08:00
|
|
|
#define radeon_get_temperature(rdev) (rdev)->asic->pm.get_temperature((rdev))
|
2012-02-24 06:53:49 +08:00
|
|
|
#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s)))
|
|
|
|
#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r)))
|
2012-02-24 06:53:47 +08:00
|
|
|
#define radeon_bandwidth_update(rdev) (rdev)->asic->display.bandwidth_update((rdev))
|
2012-02-24 06:53:39 +08:00
|
|
|
#define radeon_hpd_init(rdev) (rdev)->asic->hpd.init((rdev))
|
|
|
|
#define radeon_hpd_fini(rdev) (rdev)->asic->hpd.fini((rdev))
|
|
|
|
#define radeon_hpd_sense(rdev, h) (rdev)->asic->hpd.sense((rdev), (h))
|
|
|
|
#define radeon_hpd_set_polarity(rdev, h) (rdev)->asic->hpd.set_polarity((rdev), (h))
|
2010-04-23 00:39:58 +08:00
|
|
|
#define radeon_gui_idle(rdev) (rdev)->asic->gui_idle((rdev))
|
2012-02-24 06:53:41 +08:00
|
|
|
#define radeon_pm_misc(rdev) (rdev)->asic->pm.misc((rdev))
|
|
|
|
#define radeon_pm_prepare(rdev) (rdev)->asic->pm.prepare((rdev))
|
|
|
|
#define radeon_pm_finish(rdev) (rdev)->asic->pm.finish((rdev))
|
|
|
|
#define radeon_pm_init_profile(rdev) (rdev)->asic->pm.init_profile((rdev))
|
|
|
|
#define radeon_pm_get_dynpm_state(rdev) (rdev)->asic->pm.get_dynpm_state((rdev))
|
2016-04-01 17:51:34 +08:00
|
|
|
#define radeon_page_flip(rdev, crtc, base, async) (rdev)->asic->pflip.page_flip((rdev), (crtc), (base), (async))
|
2014-05-27 22:49:20 +08:00
|
|
|
#define radeon_page_flip_pending(rdev, crtc) (rdev)->asic->pflip.page_flip_pending((rdev), (crtc))
|
2012-08-03 23:50:54 +08:00
|
|
|
#define radeon_wait_for_vblank(rdev, crtc) (rdev)->asic->display.wait_for_vblank((rdev), (crtc))
|
|
|
|
#define radeon_mc_wait_for_idle(rdev) (rdev)->asic->mc_wait_for_idle((rdev))
|
2013-02-14 23:04:02 +08:00
|
|
|
#define radeon_get_xclk(rdev) (rdev)->asic->get_xclk((rdev))
|
2013-01-24 23:35:23 +08:00
|
|
|
#define radeon_get_gpu_clock_counter(rdev) (rdev)->asic->get_gpu_clock_counter((rdev))
|
2014-10-01 21:17:12 +08:00
|
|
|
#define radeon_get_allowed_info_register(rdev, r, v) (rdev)->asic->get_allowed_info_register((rdev), (r), (v))
|
2013-04-13 01:55:22 +08:00
|
|
|
#define radeon_dpm_init(rdev) rdev->asic->dpm.init((rdev))
|
|
|
|
#define radeon_dpm_setup_asic(rdev) rdev->asic->dpm.setup_asic((rdev))
|
|
|
|
#define radeon_dpm_enable(rdev) rdev->asic->dpm.enable((rdev))
|
2013-12-20 00:37:22 +08:00
|
|
|
#define radeon_dpm_late_enable(rdev) rdev->asic->dpm.late_enable((rdev))
|
2013-04-13 01:55:22 +08:00
|
|
|
#define radeon_dpm_disable(rdev) rdev->asic->dpm.disable((rdev))
|
2013-01-17 01:52:04 +08:00
|
|
|
#define radeon_dpm_pre_set_power_state(rdev) rdev->asic->dpm.pre_set_power_state((rdev))
|
2013-04-13 01:55:22 +08:00
|
|
|
#define radeon_dpm_set_power_state(rdev) rdev->asic->dpm.set_power_state((rdev))
|
2013-01-17 01:52:04 +08:00
|
|
|
#define radeon_dpm_post_set_power_state(rdev) rdev->asic->dpm.post_set_power_state((rdev))
|
2013-04-13 01:55:22 +08:00
|
|
|
#define radeon_dpm_display_configuration_changed(rdev) rdev->asic->dpm.display_configuration_changed((rdev))
|
|
|
|
#define radeon_dpm_fini(rdev) rdev->asic->dpm.fini((rdev))
|
|
|
|
#define radeon_dpm_get_sclk(rdev, l) rdev->asic->dpm.get_sclk((rdev), (l))
|
|
|
|
#define radeon_dpm_get_mclk(rdev, l) rdev->asic->dpm.get_mclk((rdev), (l))
|
|
|
|
#define radeon_dpm_print_power_state(rdev, ps) rdev->asic->dpm.print_power_state((rdev), (ps))
|
2013-06-28 21:28:39 +08:00
|
|
|
#define radeon_dpm_debugfs_print_current_performance_level(rdev, m) rdev->asic->dpm.debugfs_print_current_performance_level((rdev), (m))
|
2013-07-03 06:38:02 +08:00
|
|
|
#define radeon_dpm_force_performance_level(rdev, l) rdev->asic->dpm.force_performance_level((rdev), (l))
|
2013-07-08 23:35:06 +08:00
|
|
|
#define radeon_dpm_vblank_too_short(rdev) rdev->asic->dpm.vblank_too_short((rdev))
|
2013-08-01 06:13:23 +08:00
|
|
|
#define radeon_dpm_powergate_uvd(rdev, g) rdev->asic->dpm.powergate_uvd((rdev), (g))
|
2013-09-10 07:11:52 +08:00
|
|
|
#define radeon_dpm_enable_bapm(rdev, e) rdev->asic->dpm.enable_bapm((rdev), (e))
|
2014-09-30 22:12:17 +08:00
|
|
|
#define radeon_dpm_get_current_sclk(rdev) rdev->asic->dpm.get_current_sclk((rdev))
|
|
|
|
#define radeon_dpm_get_current_mclk(rdev) rdev->asic->dpm.get_current_mclk((rdev))
|
2009-06-05 20:42:42 +08:00
|
|
|
|
2009-09-11 03:46:48 +08:00
|
|
|
/* Common functions */
|
2010-01-13 22:16:38 +08:00
|
|
|
/* AGP */
|
2010-03-09 22:45:12 +08:00
|
|
|
extern int radeon_gpu_reset(struct radeon_device *rdev);
|
2013-10-03 01:01:36 +08:00
|
|
|
extern void radeon_pci_config_reset(struct radeon_device *rdev);
|
2013-01-19 02:05:39 +08:00
|
|
|
extern void r600_set_bios_scratch_engine_hung(struct radeon_device *rdev, bool hung);
|
2010-01-13 22:16:38 +08:00
|
|
|
extern void radeon_agp_disable(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern int radeon_modeset_init(struct radeon_device *rdev);
|
|
|
|
extern void radeon_modeset_fini(struct radeon_device *rdev);
|
2009-09-11 21:35:22 +08:00
|
|
|
extern bool radeon_card_posted(struct radeon_device *rdev);
|
2010-03-17 08:54:38 +08:00
|
|
|
extern void radeon_update_bandwidth_info(struct radeon_device *rdev);
|
2010-03-31 12:33:27 +08:00
|
|
|
extern void radeon_update_display_priority(struct radeon_device *rdev);
|
2009-12-01 12:06:31 +08:00
|
|
|
extern bool radeon_boot_test_post_card(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern void radeon_scratch_init(struct radeon_device *rdev);
|
2010-08-28 06:25:25 +08:00
|
|
|
extern void radeon_wb_fini(struct radeon_device *rdev);
|
|
|
|
extern int radeon_wb_init(struct radeon_device *rdev);
|
|
|
|
extern void radeon_wb_disable(struct radeon_device *rdev);
|
2009-09-11 21:55:33 +08:00
|
|
|
extern void radeon_surface_init(struct radeon_device *rdev);
|
|
|
|
extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
|
2009-10-01 16:20:52 +08:00
|
|
|
extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
|
2009-09-29 00:34:43 +08:00
|
|
|
extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
|
2009-12-07 22:52:58 +08:00
|
|
|
extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
|
2009-12-15 04:02:09 +08:00
|
|
|
extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
|
drm/radeon: add userptr support v8
This patch adds an IOCTL for turning a pointer supplied by
userspace into a buffer object.
It imposes several restrictions upon the memory being mapped:
1. It must be page aligned (both start/end addresses, i.e ptr and size).
2. It must be normal system memory, not a pointer into another map of IO
space (e.g. it must not be a GTT mmapping of another object).
3. The BO is mapped into GTT, so the maximum amount of memory mapped at
all times is still the GTT limit.
4. The BO is only mapped readonly for now, so no write support.
5. List of backing pages is only acquired once, so they represent a
snapshot of the first use.
Exporting and sharing as well as mapping of buffer objects created by
this function is forbidden and results in an -EPERM.
v2: squash all previous changes into first public version
v3: fix tabs, map readonly, don't use MM callback any more
v4: set TTM_PAGE_FLAG_SG so that TTM never messes with the pages,
pin/unpin pages on bind/unbind instead of populate/unpopulate
v5: rebased on 3.17-wip, IOCTL renamed to userptr, reject any unknown
flags, better handle READONLY flag, improve permission check
v6: fix ptr cast warning, use set_page_dirty/mark_page_accessed on unpin
v7: add warning about it's availability in the API definition
v8: drop access_ok check, fix VM mapping bits
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v4)
Reviewed-by: Jérôme Glisse <jglisse@redhat.com> (v4)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2014-08-07 15:36:00 +08:00
|
|
|
extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
|
|
|
uint32_t flags);
|
|
|
|
extern bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm);
|
|
|
|
extern bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm);
|
drm/radeon/kms: simplify memory controller setup V2
Get rid of _location and use _start/_end also simplify the
computation of vram_start|end & gtt_start|end. For R1XX-R2XX
we place VRAM at the same address of PCI aperture, those GPU
shouldn't have much memory and seems to behave better when
setup that way. For R3XX and newer we place VRAM at 0. For
R6XX-R7XX AGP we place VRAM before or after AGP aperture this
might limit to limit the VRAM size but it's very unlikely.
For IGP we don't change the VRAM placement.
Tested on (compiz,quake3,suspend/resume):
PCI/PCIE:RV280,R420,RV515,RV570,RV610,RV710
AGP:RV100,RV280,R420,RV350,RV620(RPB*),RV730
IGP:RS480(RPB*),RS690,RS780(RPB*),RS880
RPB: resume previously broken
V2 correct commit message to reflect more accurately the bug
and move VRAM placement to 0 for most of the GPU to avoid
limiting VRAM.
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-18 05:54:29 +08:00
|
|
|
extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base);
|
|
|
|
extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
|
2012-09-17 12:40:31 +08:00
|
|
|
extern int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
|
2016-03-18 23:58:39 +08:00
|
|
|
extern int radeon_suspend_kms(struct drm_device *dev, bool suspend,
|
|
|
|
bool fbcon, bool freeze);
|
2011-03-14 07:47:24 +08:00
|
|
|
extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
|
2013-02-27 00:26:51 +08:00
|
|
|
extern void radeon_program_register_sequence(struct radeon_device *rdev,
|
|
|
|
const u32 *registers,
|
|
|
|
const u32 array_size);
|
2009-09-11 03:46:48 +08:00
|
|
|
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
/*
|
|
|
|
* vm
|
|
|
|
*/
|
|
|
|
int radeon_vm_manager_init(struct radeon_device *rdev);
|
|
|
|
void radeon_vm_manager_fini(struct radeon_device *rdev);
|
2014-02-20 20:42:17 +08:00
|
|
|
int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
|
2014-11-27 21:48:42 +08:00
|
|
|
struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
|
2014-03-03 19:38:08 +08:00
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct list_head *head);
|
2012-08-09 22:21:08 +08:00
|
|
|
struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm, int ring);
|
2014-02-20 17:47:05 +08:00
|
|
|
void radeon_vm_flush(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
2014-11-19 21:01:24 +08:00
|
|
|
int ring, struct radeon_fence *fence);
|
2012-08-09 22:21:08 +08:00
|
|
|
void radeon_vm_fence(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct radeon_fence *fence);
|
2012-09-18 01:36:18 +08:00
|
|
|
uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr);
|
2014-02-20 20:42:17 +08:00
|
|
|
int radeon_vm_update_page_directory(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm);
|
2014-07-18 14:56:40 +08:00
|
|
|
int radeon_vm_clear_freed(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm);
|
2014-07-18 15:24:53 +08:00
|
|
|
int radeon_vm_clear_invalids(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm);
|
2013-11-25 22:42:11 +08:00
|
|
|
int radeon_vm_bo_update(struct radeon_device *rdev,
|
2014-07-18 14:56:40 +08:00
|
|
|
struct radeon_bo_va *bo_va,
|
2013-11-25 22:42:11 +08:00
|
|
|
struct ttm_mem_reg *mem);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
void radeon_vm_bo_invalidate(struct radeon_device *rdev,
|
|
|
|
struct radeon_bo *bo);
|
2012-09-11 22:10:00 +08:00
|
|
|
struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
|
|
|
|
struct radeon_bo *bo);
|
2012-09-11 22:10:04 +08:00
|
|
|
struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
|
|
|
|
struct radeon_vm *vm,
|
|
|
|
struct radeon_bo *bo);
|
|
|
|
int radeon_vm_bo_set_addr(struct radeon_device *rdev,
|
|
|
|
struct radeon_bo_va *bo_va,
|
|
|
|
uint64_t offset,
|
|
|
|
uint32_t flags);
|
2014-07-18 14:56:40 +08:00
|
|
|
void radeon_vm_bo_rmv(struct radeon_device *rdev,
|
|
|
|
struct radeon_bo_va *bo_va);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
|
2012-03-30 20:59:57 +08:00
|
|
|
/* audio */
|
|
|
|
void r600_audio_update_hdmi(struct work_struct *work);
|
2013-08-01 04:51:33 +08:00
|
|
|
struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev);
|
|
|
|
struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev);
|
2014-02-19 00:07:55 +08:00
|
|
|
void r600_audio_enable(struct radeon_device *rdev,
|
|
|
|
struct r600_audio_pin *pin,
|
2014-09-19 05:26:39 +08:00
|
|
|
u8 enable_mask);
|
2014-02-19 00:07:55 +08:00
|
|
|
void dce6_audio_enable(struct radeon_device *rdev,
|
|
|
|
struct r600_audio_pin *pin,
|
2014-09-19 05:26:39 +08:00
|
|
|
u8 enable_mask);
|
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm).
Client are in charge of virtual address space, they need to
map bo into it by calling DRM_RADEON_GEM_VA ioctl.
First 16M of virtual address space is reserved by the kernel.
Once using 2 level page table we should be able to have a small
vram memory footprint for each pt (there would be one pt for all
gart, one for all vram and then one first level for each virtual
address space).
Plan include using the sub allocator for a common vm page table
area and using memcpy to copy vm page table in & out. Or use
a gart object and copy things in & out using dma.
v2: agd5f fixes:
- Add vram base offset for vram pages. The GPU physical address of a
vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete
cards and the physical bus address of the stolen memory on
integrated chips.
- VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1
v3: agd5f:
- integrate with the semaphore/multi-ring stuff
v4:
- rebase on top ttm dma & multi-ring stuff
- userspace is now in charge of the address space
- no more specific cs vm ioctl, instead cs ioctl has a new
chunk
v5:
- properly handle mem == NULL case from move_notify callback
- fix the vm cleanup path
v6:
- fix update of page table to only happen on valid mem placement
v7:
- add tlb flush for each vm context
- add flags to define mapping property (readable, writeable, snooped)
- make ring id implicit from ib->fence->ring, up to each asic callback
to then do ring specific scheduling if vm ib scheduling function
v8:
- add query for ib limit and kernel reserved virtual space
- rename vm->size to max_pfn (maximum number of page)
- update gem_va ioctl to also allow unmap operation
- bump kernel version to allow userspace to query for vm support
v9:
- rebuild page table only when bind and incrementaly depending
on bo referenced by cs and that have been moved
- allow virtual address space to grow
- use sa allocator for vram page table
- return invalid when querying vm limit on non cayman GPU
- dump vm fault register on lockup
v10: agd5f:
- Move the vm schedule_ib callback to a standalone function, remove
the callback and use the existing ib_execute callback for VM IBs.
v11:
- rebase on top of lastest Linus
v12: agd5f:
- remove spurious backslash
- set IB vm_id to 0 in radeon_ib_get()
v13: agd5f:
- fix handling of RADEON_CHUNK_ID_FLAGS
v14:
- fix va destruction
- fix suspend resume
- forbid bo to have several different va in same vm
v15:
- rebase
v16:
- cleanup left over of vm init/fini
v17: agd5f:
- cs checker
v18: agd5f:
- reworks the CS ioctl to better support multiple rings and
VM. Rather than adding a new chunk id for VM, just re-use the
IB chunk id and add a new flags for VM mode. Also define additional
dwords for the flags chunk id to define the what ring we want to use
(gfx, compute, uvd, etc.) and the priority.
v19:
- fix cs fini in weird case of no ib
- semi working flush fix for ni
- rebase on top of sa allocator changes
v20: agd5f:
- further CS ioctl cleanups from Christian's comments
v21: agd5f:
- integrate CS checker improvements
v22: agd5f:
- final cleanups for release, only allow VM CS on cayman
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2012-01-06 11:11:05 +08:00
|
|
|
|
2011-10-28 22:30:02 +08:00
|
|
|
/*
|
|
|
|
* R600 vram scratch functions
|
|
|
|
*/
|
|
|
|
int r600_vram_scratch_init(struct radeon_device *rdev);
|
|
|
|
void r600_vram_scratch_fini(struct radeon_device *rdev);
|
|
|
|
|
2011-12-17 06:03:42 +08:00
|
|
|
/*
|
|
|
|
* r600 cs checking helper
|
|
|
|
*/
|
|
|
|
unsigned r600_mip_minify(unsigned size, unsigned level);
|
|
|
|
bool r600_fmt_is_valid_color(u32 format);
|
|
|
|
bool r600_fmt_is_valid_texture(u32 format, enum radeon_family family);
|
|
|
|
int r600_fmt_get_blocksize(u32 format);
|
|
|
|
int r600_fmt_get_nblocksx(u32 format, u32 w);
|
|
|
|
int r600_fmt_get_nblocksy(u32 format, u32 h);
|
|
|
|
|
2011-02-19 00:59:19 +08:00
|
|
|
/*
|
|
|
|
* r600 functions used by radeon_encoder.c
|
|
|
|
*/
|
2012-04-30 21:44:54 +08:00
|
|
|
struct radeon_hdmi_acr {
|
|
|
|
u32 clock;
|
|
|
|
|
|
|
|
int n_32khz;
|
|
|
|
int cts_32khz;
|
|
|
|
|
|
|
|
int n_44_1khz;
|
|
|
|
int cts_44_1khz;
|
|
|
|
|
|
|
|
int n_48khz;
|
|
|
|
int cts_48khz;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2012-05-06 23:29:44 +08:00
|
|
|
extern struct radeon_hdmi_acr r600_hdmi_acr(uint32_t clock);
|
|
|
|
|
2012-06-01 07:00:25 +08:00
|
|
|
extern u32 r6xx_remap_render_backend(struct radeon_device *rdev,
|
|
|
|
u32 tiling_pipe_num,
|
|
|
|
u32 max_rb_num,
|
|
|
|
u32 total_max_rb_num,
|
|
|
|
u32 enabled_rb_mask);
|
2010-03-25 01:36:43 +08:00
|
|
|
|
2012-05-06 23:29:44 +08:00
|
|
|
/*
|
|
|
|
* evergreen functions used by radeon_encoder.c
|
|
|
|
*/
|
|
|
|
|
2011-01-07 10:19:31 +08:00
|
|
|
extern int ni_init_microcode(struct radeon_device *rdev);
|
2011-03-03 09:07:34 +08:00
|
|
|
extern int ni_mc_load_microcode(struct radeon_device *rdev);
|
2011-01-07 10:19:31 +08:00
|
|
|
|
2012-08-01 05:14:35 +08:00
|
|
|
/* radeon_acpi.c */
|
|
|
|
#if defined(CONFIG_ACPI)
|
|
|
|
extern int radeon_acpi_init(struct radeon_device *rdev);
|
|
|
|
extern void radeon_acpi_fini(struct radeon_device *rdev);
|
2013-06-26 12:33:35 +08:00
|
|
|
extern bool radeon_acpi_is_pcie_performance_request_supported(struct radeon_device *rdev);
|
|
|
|
extern int radeon_acpi_pcie_performance_request(struct radeon_device *rdev,
|
2013-02-14 04:47:24 +08:00
|
|
|
u8 perf_req, bool advertise);
|
2013-06-26 12:33:35 +08:00
|
|
|
extern int radeon_acpi_pcie_notify_device_ready(struct radeon_device *rdev);
|
2012-08-01 05:14:35 +08:00
|
|
|
#else
|
|
|
|
static inline int radeon_acpi_init(struct radeon_device *rdev) { return 0; }
|
|
|
|
static inline void radeon_acpi_fini(struct radeon_device *rdev) { }
|
|
|
|
#endif
|
2010-07-06 23:40:24 +08:00
|
|
|
|
2013-01-03 07:27:41 +08:00
|
|
|
int radeon_cs_packet_parse(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt,
|
|
|
|
unsigned idx);
|
2013-01-03 07:27:42 +08:00
|
|
|
bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p);
|
2013-01-03 07:27:45 +08:00
|
|
|
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
|
|
|
|
struct radeon_cs_packet *pkt);
|
2013-01-03 07:27:46 +08:00
|
|
|
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
|
2014-11-27 21:48:42 +08:00
|
|
|
struct radeon_bo_list **cs_reloc,
|
2013-01-03 07:27:46 +08:00
|
|
|
int nomm);
|
2013-01-03 07:27:43 +08:00
|
|
|
int r600_cs_common_vline_parse(struct radeon_cs_parser *p,
|
|
|
|
uint32_t *vline_start_end,
|
|
|
|
uint32_t *vline_status);
|
2013-01-03 07:27:41 +08:00
|
|
|
|
2009-11-20 21:29:23 +08:00
|
|
|
#include "radeon_object.h"
|
|
|
|
|
2009-06-05 20:42:42 +08:00
|
|
|
#endif
|