forked from OSchip/llvm-project
[AMDGPU] Optimize image_[load|store]_mip
Summary: Replace image_load_mip/image_store_mip with image_load/image_store if lod is 0. Reviewers: arsenm, nhaehnle Reviewed By: arsenm Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63073 llvm-svn: 362957
This commit is contained in:
parent
67065c5c70
commit
9b11e93d90
|
@ -101,6 +101,22 @@ def MIMGLZMappingTable : GenericTable {
|
|||
let PrimaryKeyName = "getMIMGLZMappingInfo";
|
||||
}
|
||||
|
||||
class MIMGMIPMapping<MIMGBaseOpcode mip, MIMGBaseOpcode nonmip> {
|
||||
MIMGBaseOpcode MIP = mip;
|
||||
MIMGBaseOpcode NONMIP = nonmip;
|
||||
}
|
||||
|
||||
def MIMGMIPMappingTable : GenericTable {
|
||||
let FilterClass = "MIMGMIPMapping";
|
||||
let CppTypeName = "MIMGMIPMappingInfo";
|
||||
let Fields = ["MIP", "NONMIP"];
|
||||
GenericEnum TypeOf_MIP = MIMGBaseOpcode;
|
||||
GenericEnum TypeOf_NONMIP = MIMGBaseOpcode;
|
||||
|
||||
let PrimaryKey = ["MIP"];
|
||||
let PrimaryKeyName = "getMIMGMIPMappingInfo";
|
||||
}
|
||||
|
||||
class MIMG <dag outs, string dns = "">
|
||||
: InstSI <outs, (ins), "", []> {
|
||||
|
||||
|
@ -808,3 +824,7 @@ def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
|
|||
def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
|
||||
def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
|
||||
def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
|
||||
|
||||
// MIP to NONMIP Optimization Mapping
|
||||
def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
|
||||
def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
|
||||
|
|
|
@ -4863,6 +4863,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
|||
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
|
||||
const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
|
||||
AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
|
||||
const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
|
||||
AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
|
||||
unsigned IntrOpcode = Intr->BaseOpcode;
|
||||
bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
|
||||
|
||||
|
@ -4966,6 +4968,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
|||
}
|
||||
}
|
||||
|
||||
// Optimize _mip away, when 'lod' is zero
|
||||
if (MIPMappingInfo) {
|
||||
if (auto ConstantLod =
|
||||
dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
|
||||
if (ConstantLod->isNullValue()) {
|
||||
IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
|
||||
NumMIVAddrs--; // remove 'lod'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for 16 bit addresses and pack if true.
|
||||
unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
|
||||
MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
|
||||
|
|
|
@ -104,6 +104,7 @@ namespace AMDGPU {
|
|||
#define GET_MIMGDimInfoTable_IMPL
|
||||
#define GET_MIMGInfoTable_IMPL
|
||||
#define GET_MIMGLZMappingTable_IMPL
|
||||
#define GET_MIMGMIPMappingTable_IMPL
|
||||
#include "AMDGPUGenSearchableTables.inc"
|
||||
|
||||
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
|
||||
|
|
|
@ -45,6 +45,7 @@ namespace AMDGPU {
|
|||
#define GET_MIMGDim_DECL
|
||||
#define GET_MIMGEncoding_DECL
|
||||
#define GET_MIMGLZMapping_DECL
|
||||
#define GET_MIMGMIPMapping_DECL
|
||||
#include "AMDGPUGenSearchableTables.inc"
|
||||
|
||||
namespace IsaInfo {
|
||||
|
@ -218,9 +219,17 @@ struct MIMGLZMappingInfo {
|
|||
MIMGBaseOpcode LZ;
|
||||
};
|
||||
|
||||
struct MIMGMIPMappingInfo {
|
||||
MIMGBaseOpcode MIP;
|
||||
MIMGBaseOpcode NONMIP;
|
||||
};
|
||||
|
||||
LLVM_READONLY
|
||||
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
|
||||
|
||||
LLVM_READONLY
|
||||
const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
|
||||
|
||||
LLVM_READONLY
|
||||
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
|
||||
unsigned VDataDwords, unsigned VAddrDwords);
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}load_mip_1d:
|
||||
; GCN-NOT: image_load_mip
|
||||
; GCN: image_load
|
||||
define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_mip_2d:
|
||||
; GCN-NOT: image_load_mip
|
||||
; GCN: image_load
|
||||
define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_mip_3d:
|
||||
; GCN-NOT: image_load_mip
|
||||
; GCN: image_load
|
||||
define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_mip_1darray:
|
||||
; GCN-NOT: image_load_mip
|
||||
; GCN: image_load
|
||||
define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_mip_2darray:
|
||||
; GCN-NOT: image_load_mip
|
||||
; GCN: image_load
|
||||
define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_mip_cube:
|
||||
; GCN-NOT: image_load_mip
|
||||
; GCN: image_load
|
||||
define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
|
||||
main_body:
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}store_mip_1d:
|
||||
; GCN-NOT: image_store_mip
|
||||
; GCN: image_store
|
||||
define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_mip_2d:
|
||||
; GCN-NOT: image_store_mip
|
||||
; GCN: image_store
|
||||
define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_mip_3d:
|
||||
; GCN-NOT: image_store_mip
|
||||
; GCN: image_store
|
||||
define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_mip_1darray:
|
||||
; GCN-NOT: image_store_mip
|
||||
; GCN: image_store
|
||||
define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_mip_2darray:
|
||||
; GCN-NOT: image_store_mip
|
||||
; GCN: image_store
|
||||
define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_mip_cube:
|
||||
; GCN-NOT: image_store_mip
|
||||
; GCN: image_store
|
||||
define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
|
||||
|
||||
|
||||
declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
|
||||
declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
Loading…
Reference in New Issue