radeon: Deinline indirect register accessor functions

This patch deinlines indirect register accessor functions.

These functions perform two mmio accesses, framed by spin lock/unlock.
Spin lock/unlock by itself takes more than 50 cycles in ideal case
(if lock is exclusively cached on current CPU).

With this .config: http://busybox.net/~vda/kernel_config,
after uninlining these functions have sizes and callsite counts
as follows:

r600_uvd_ctx_rreg: 111 bytes, 4 callsites
r600_uvd_ctx_wreg: 113 bytes, 5 callsites
eg_pif_phy0_rreg: 106 bytes, 13 callsites
eg_pif_phy0_wreg: 108 bytes, 13 callsites
eg_pif_phy1_rreg: 107 bytes, 13 callsites
eg_pif_phy1_wreg: 108 bytes, 13 callsites
rv370_pcie_rreg: 111 bytes, 21 callsites
rv370_pcie_wreg: 113 bytes, 24 callsites
r600_rcu_rreg: 111 bytes, 16 callsites
r600_rcu_wreg: 113 bytes, 25 callsites
cik_didt_rreg: 106 bytes, 10 callsites
cik_didt_wreg: 107 bytes, 10 callsites
tn_smc_rreg: 106 bytes, 126 callsites
tn_smc_wreg: 107 bytes, 116 callsites
eg_cg_rreg: 107 bytes, 20 callsites
eg_cg_wreg: 108 bytes, 52 callsites

Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and
a locked (slow) path. This patch deinlines only slow path.

r100_mm_rreg_slow: 78 bytes, 2083 callsites
r100_mm_wreg_slow: 81 bytes, 3570 callsites

Reduction in code size is more than 65,000 bytes:

    text     data      bss       dec     hex filename
85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before
85674192 22294776 20627456 128598664 7aa4288 vmlinux

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Denys Vlasenko 2015-05-20 13:02:37 +02:00 committed by Alex Deucher
parent b0b9bb4dd5
commit 9e5acbc213
7 changed files with 241 additions and 197 deletions

View File

@ -174,6 +174,31 @@ int cik_get_allowed_info_register(struct radeon_device *rdev,
} }
} }
/*
* Indirect registers accessor
*/
u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->didt_idx_lock, flags);
WREG32(CIK_DIDT_IND_INDEX, (reg));
r = RREG32(CIK_DIDT_IND_DATA);
spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
return r;
}
void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->didt_idx_lock, flags);
WREG32(CIK_DIDT_IND_INDEX, (reg));
WREG32(CIK_DIDT_IND_DATA, (v));
spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
}
/* get temperature in millidegrees */ /* get temperature in millidegrees */
int ci_get_temp(struct radeon_device *rdev) int ci_get_temp(struct radeon_device *rdev)
{ {

View File

@ -35,6 +35,75 @@
#include "evergreen_blit_shaders.h" #include "evergreen_blit_shaders.h"
#include "radeon_ucode.h" #include "radeon_ucode.h"
/*
* Indirect registers accessor
*/
u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->cg_idx_lock, flags);
WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
r = RREG32(EVERGREEN_CG_IND_DATA);
spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
return r;
}
void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->cg_idx_lock, flags);
WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
WREG32(EVERGREEN_CG_IND_DATA, (v));
spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
}
u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->pif_idx_lock, flags);
WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
r = RREG32(EVERGREEN_PIF_PHY0_DATA);
spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
return r;
}
void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->pif_idx_lock, flags);
WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
}
u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->pif_idx_lock, flags);
WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
r = RREG32(EVERGREEN_PIF_PHY1_DATA);
spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
return r;
}
void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->pif_idx_lock, flags);
WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
}
static const u32 crtc_offsets[6] = static const u32 crtc_offsets[6] =
{ {
EVERGREEN_CRTC0_REGISTER_OFFSET, EVERGREEN_CRTC0_REGISTER_OFFSET,

View File

@ -36,6 +36,31 @@
#include "radeon_ucode.h" #include "radeon_ucode.h"
#include "clearstate_cayman.h" #include "clearstate_cayman.h"
/*
* Indirect registers accessor
*/
u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->smc_idx_lock, flags);
WREG32(TN_SMC_IND_INDEX_0, (reg));
r = RREG32(TN_SMC_IND_DATA_0);
spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
return r;
}
void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->smc_idx_lock, flags);
WREG32(TN_SMC_IND_INDEX_0, (reg));
WREG32(TN_SMC_IND_DATA_0, (v));
spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
}
static const u32 tn_rlc_save_restore_register_list[] = static const u32 tn_rlc_save_restore_register_list[] =
{ {
0x98fc, 0x98fc,

View File

@ -4090,6 +4090,28 @@ int r100_init(struct radeon_device *rdev)
return 0; return 0;
} }
uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
{
unsigned long flags;
uint32_t ret;
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
return ret;
}
void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
}
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
{ {
if (reg < rdev->rio_mem_size) if (reg < rdev->rio_mem_size)

View File

@ -49,6 +49,31 @@
* tell. (Jerome Glisse) * tell. (Jerome Glisse)
*/ */
/*
* Indirect registers accessor
*/
uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
{
unsigned long flags;
uint32_t r;
spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
r = RREG32(RADEON_PCIE_DATA);
spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
return r;
}
void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
WREG32(RADEON_PCIE_DATA, (v));
spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
}
/* /*
* rv370,rv380 PCIE GART * rv370,rv380 PCIE GART
*/ */

View File

@ -108,6 +108,53 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev);
extern int evergreen_rlc_resume(struct radeon_device *rdev); extern int evergreen_rlc_resume(struct radeon_device *rdev);
extern void rv770_set_clk_bypass_mode(struct radeon_device *rdev); extern void rv770_set_clk_bypass_mode(struct radeon_device *rdev);
/*
* Indirect registers accessor
*/
u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
r = RREG32(R600_RCU_DATA);
spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
return r;
}
void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
WREG32(R600_RCU_DATA, (v));
spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
}
u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
r = RREG32(R600_UVD_CTX_DATA);
spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
return r;
}
void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
WREG32(R600_UVD_CTX_DATA, (v));
spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
}
/** /**
* r600_get_allowed_info_register - fetch the register for the info ioctl * r600_get_allowed_info_register - fetch the register for the info ioctl
* *

View File

@ -2474,38 +2474,24 @@ int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
#define RADEON_MIN_MMIO_SIZE 0x10000 #define RADEON_MIN_MMIO_SIZE 0x10000
uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg);
void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v);
static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
bool always_indirect) bool always_indirect)
{ {
/* The mmio size is 64kb at minimum. Allows the if to be optimized out. */ /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
return readl(((void __iomem *)rdev->rmmio) + reg); return readl(((void __iomem *)rdev->rmmio) + reg);
else { else
unsigned long flags; return r100_mm_rreg_slow(rdev, reg);
uint32_t ret;
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
return ret;
}
} }
static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
bool always_indirect) bool always_indirect)
{ {
if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
writel(v, ((void __iomem *)rdev->rmmio) + reg); writel(v, ((void __iomem *)rdev->rmmio) + reg);
else { else
unsigned long flags; r100_mm_wreg_slow(rdev, reg, v);
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
}
} }
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
@ -2596,184 +2582,29 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f)
#define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v)) #define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v))
/* /*
* Indirect registers accessor * Indirect registers accessors.
* They used to be inlined, but this increases code size by ~65 kbytes.
* Since each performs a pair of MMIO ops
* within a spin_lock_irqsave/spin_unlock_irqrestore region,
* the cost of call+ret is almost negligible. MMIO and locking
* costs several dozens of cycles each at best, call+ret is ~5 cycles.
*/ */
static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg) uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
{ void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
unsigned long flags; u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg);
uint32_t r; void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v);
u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg);
spin_lock_irqsave(&rdev->pcie_idx_lock, flags); void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v);
WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg);
r = RREG32(RADEON_PCIE_DATA); void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v);
spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg);
return r; void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v);
} u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg);
void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v);
static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg);
{ void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v);
unsigned long flags; u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg);
void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v);
spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
WREG32(RADEON_PCIE_DATA, (v));
spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
}
static inline u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->smc_idx_lock, flags);
WREG32(TN_SMC_IND_INDEX_0, (reg));
r = RREG32(TN_SMC_IND_DATA_0);
spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
return r;
}
static inline void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->smc_idx_lock, flags);
WREG32(TN_SMC_IND_INDEX_0, (reg));
WREG32(TN_SMC_IND_DATA_0, (v));
spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
}
static inline u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
r = RREG32(R600_RCU_DATA);
spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
return r;
}
static inline void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
WREG32(R600_RCU_DATA, (v));
spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
}
static inline u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->cg_idx_lock, flags);
WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
r = RREG32(EVERGREEN_CG_IND_DATA);
spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
return r;
}
static inline void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->cg_idx_lock, flags);
WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
WREG32(EVERGREEN_CG_IND_DATA, (v));
spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
}
static inline u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->pif_idx_lock, flags);
WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
r = RREG32(EVERGREEN_PIF_PHY0_DATA);
spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
return r;
}
static inline void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->pif_idx_lock, flags);
WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
}
static inline u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->pif_idx_lock, flags);
WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
r = RREG32(EVERGREEN_PIF_PHY1_DATA);
spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
return r;
}
static inline void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->pif_idx_lock, flags);
WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
}
static inline u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
r = RREG32(R600_UVD_CTX_DATA);
spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
return r;
}
static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
WREG32(R600_UVD_CTX_DATA, (v));
spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
}
static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&rdev->didt_idx_lock, flags);
WREG32(CIK_DIDT_IND_INDEX, (reg));
r = RREG32(CIK_DIDT_IND_DATA);
spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
return r;
}
static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&rdev->didt_idx_lock, flags);
WREG32(CIK_DIDT_IND_INDEX, (reg));
WREG32(CIK_DIDT_IND_DATA, (v));
spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
}
void r100_pll_errata_after_index(struct radeon_device *rdev); void r100_pll_errata_after_index(struct radeon_device *rdev);