[AMDGPU] Optimize image_[load|store]_mip

Summary: Replace image_load_mip/image_store_mip with image_load/image_store if lod is 0. Reviewers: arsenm, nhaehnle Reviewed By: arsenm Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63073 llvm-svn: 362957
2019-06-10 15:58:51 +00:00 · 2019-06-10 15:58:51 +00:00 · 9b11e93d90
parent 67065c5c70
commit 9b11e93d90
5 changed files with 175 additions and 0 deletions
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@ -101,6 +101,22 @@ def MIMGLZMappingTable : GenericTable {
  let PrimaryKeyName = "getMIMGLZMappingInfo";
 }

+class MIMGMIPMapping<MIMGBaseOpcode mip, MIMGBaseOpcode nonmip> {
+  MIMGBaseOpcode MIP = mip;
+  MIMGBaseOpcode NONMIP = nonmip;
+}
+
+def MIMGMIPMappingTable : GenericTable {
+  let FilterClass = "MIMGMIPMapping";
+  let CppTypeName = "MIMGMIPMappingInfo";
+  let Fields = ["MIP", "NONMIP"];
+  GenericEnum TypeOf_MIP = MIMGBaseOpcode;
+  GenericEnum TypeOf_NONMIP = MIMGBaseOpcode;
+
+  let PrimaryKey = ["MIP"];
+  let PrimaryKeyName = "getMIMGMIPMappingInfo";
+}
+
 class MIMG <dag outs, string dns = "">
  : InstSI <outs, (ins), "", []> {

@ -808,3 +824,7 @@ def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
 def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
 def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
 def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
+
+// MIP to NONMIP Optimization Mapping
+def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
+def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@ -4863,6 +4863,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
  const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
      AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+  const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
+      AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
  unsigned IntrOpcode = Intr->BaseOpcode;
  bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;

@ -4966,6 +4968,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
    }
  }

+  // Optimize _mip away, when 'lod' is zero
+  if (MIPMappingInfo) {
+    if (auto ConstantLod =
+         dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
+      if (ConstantLod->isNullValue()) {
+        IntrOpcode = MIPMappingInfo->NONMIP;  // set new opcode to variant without _mip
+        NumMIVAddrs--;               // remove 'lod'
+      }
+    }
+  }
+
  // Check for 16 bit addresses and pack if true.
  unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
  MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@ -104,6 +104,7 @@ namespace AMDGPU {
 #define GET_MIMGDimInfoTable_IMPL
 #define GET_MIMGInfoTable_IMPL
 #define GET_MIMGLZMappingTable_IMPL
+#define GET_MIMGMIPMappingTable_IMPL
 #include "AMDGPUGenSearchableTables.inc"

 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@ -45,6 +45,7 @@ namespace AMDGPU {
 #define GET_MIMGDim_DECL
 #define GET_MIMGEncoding_DECL
 #define GET_MIMGLZMapping_DECL
+#define GET_MIMGMIPMapping_DECL
 #include "AMDGPUGenSearchableTables.inc"

 namespace IsaInfo {
@ -218,9 +219,17 @@ struct MIMGLZMappingInfo {
  MIMGBaseOpcode LZ;
 };

+struct MIMGMIPMappingInfo {
+  MIMGBaseOpcode MIP;
+  MIMGBaseOpcode NONMIP;
+};
+
 LLVM_READONLY
 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);

+LLVM_READONLY
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+
 LLVM_READONLY
 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
                  unsigned VDataDwords, unsigned VAddrDwords);
--- a/llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll
+++ b/llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll
@ -0,0 +1,132 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+
+; GCN-LABEL: {{^}}load_mip_1d:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_2d:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_3d:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_1darray:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_2darray:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_cube:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+
+
+; GCN-LABEL: {{^}}store_mip_1d:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_2d:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_3d:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_1darray:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_2darray:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_cube:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+