drm/radeon: Inline r100_mm_rreg, -wreg, v3
This was originally un-inlined by Andi Kleen in 2011 citing size concerns. Indeed, a first attempt at inlining it grew radeon.ko by 7%. However, 2% of cpu is spent in this function. Simply inlining it gave 1% more fps in Urban Terror. v2: We know the minimum MMIO size. Adding it to the if allows the compiler to optimize the branch out, improving both performance and size. The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but common sense says perf is now more than 1% better. v3: Also change _wreg, make the threshold a define. Inlining _wreg increased the size a bit compared to v2, so now radeon.ko is only 1% smaller. Signed-off-by: Lauri Kasanen <cand@gmx.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
3e22920fbd
commit
59bc1d89d6
|
@ -4065,39 +4065,6 @@ int r100_init(struct radeon_device *rdev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
|
||||
bool always_indirect)
|
||||
{
|
||||
if (reg < rdev->rmmio_size && !always_indirect)
|
||||
return readl(((void __iomem *)rdev->rmmio) + reg);
|
||||
else {
|
||||
unsigned long flags;
|
||||
uint32_t ret;
|
||||
|
||||
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
|
||||
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
|
||||
ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
|
||||
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
|
||||
bool always_indirect)
|
||||
{
|
||||
if (reg < rdev->rmmio_size && !always_indirect)
|
||||
writel(v, ((void __iomem *)rdev->rmmio) + reg);
|
||||
else {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
|
||||
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
|
||||
writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
|
||||
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
|
||||
{
|
||||
if (reg < rdev->rio_mem_size)
|
||||
|
|
|
@ -2357,10 +2357,42 @@ int radeon_device_init(struct radeon_device *rdev,
|
|||
void radeon_device_fini(struct radeon_device *rdev);
|
||||
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
|
||||
|
||||
uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
|
||||
bool always_indirect);
|
||||
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
|
||||
bool always_indirect);
|
||||
#define RADEON_MIN_MMIO_SIZE 0x10000
|
||||
|
||||
static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
|
||||
bool always_indirect)
|
||||
{
|
||||
/* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
|
||||
if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
|
||||
return readl(((void __iomem *)rdev->rmmio) + reg);
|
||||
else {
|
||||
unsigned long flags;
|
||||
uint32_t ret;
|
||||
|
||||
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
|
||||
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
|
||||
ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
|
||||
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
|
||||
bool always_indirect)
|
||||
{
|
||||
if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
|
||||
writel(v, ((void __iomem *)rdev->rmmio) + reg);
|
||||
else {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
|
||||
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
|
||||
writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
|
||||
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
|
||||
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
|
||||
|
||||
|
|
Loading…
Reference in New Issue