[AMDGPU] Restrict image_msaa_load to MSAA dimension types

This instruction is only valid on 2D MSAA and 2D MSAA Array surfaces. Remove intrinsic support for other dimension types, and block assembly for unsupported dimensions. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D98397
2021-03-12 09:47:08 +09:00 · 2021-03-12 09:47:08 +09:00 · c07f2025e4
parent e1364f1068
commit c07f2025e4
7 changed files with 102 additions and 174 deletions
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@ -508,12 +508,14 @@ class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {

 // Represent texture/image types / dimensionality.
 class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix,
-                     list<string> coord_names, list<string> slice_names> {
+                     list<string> coord_names, list<string> slice_names,
+                     bit msaa = 0> {
  AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
  string Name = name; // e.g. "2darraymsaa"
  string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings)
  bits<3> Encoding = enc;
  bit DA = 0; // DA bit in MIMG encoding
+  bit MSAA = msaa;

  list<AMDGPUArg> CoordSliceArgs =
    makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret;
@ -536,9 +538,9 @@ let DA = 1 in {
  def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>;
  def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>;
 }
-def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>;
+def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"], 1>;
 let DA = 1 in {
-  def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>;
+  def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"], 1>;
 }

 def AMDGPUDims {
@ -798,10 +800,15 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
              "STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
              [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>;

-  defm int_amdgcn_image_msaa_load_x
-    : AMDGPUImageDimIntrinsicsAll<"MSAA_LOAD_X", [llvm_any_ty], [], [IntrReadMem],
-                                  [SDNPMemOperand]>,
-      AMDGPUImageDMaskIntrinsic;
+  //////////////////////////////////////////////////////////////////////////
+  // MSAA intrinsics
+  //////////////////////////////////////////////////////////////////////////
+  foreach dim = AMDGPUDims.Msaa in {
+    def int_amdgcn_image_msaa_load_x # _ # dim.Name:
+        AMDGPUImageDimIntrinsic<
+            AMDGPUDimNoSampleProfile<"MSAA_LOAD_X", dim, [llvm_any_ty], []>,
+            [IntrReadMem], [SDNPMemOperand]>;
+  }

  //////////////////////////////////////////////////////////////////////////
  // sample and getlod intrinsics
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@ -1545,6 +1545,7 @@ private:
  bool validateMIMGAddrSize(const MCInst &Inst);
  bool validateMIMGD16(const MCInst &Inst);
  bool validateMIMGDim(const MCInst &Inst);
+  bool validateMIMGMSAA(const MCInst &Inst);
  bool validateLdsDirect(const MCInst &Inst);
  bool validateOpSel(const MCInst &Inst);
  bool validateVccOperand(unsigned Reg) const;
@ -3492,6 +3493,29 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
 }

+bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
+  const unsigned Opc = Inst.getOpcode();
+  const MCInstrDesc &Desc = MII.get(Opc);
+
+  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+    return true;
+
+  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
+  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+
+  if (!BaseOpcode->MSAA)
+    return true;
+
+  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+  assert(DimIdx != -1);
+
+  unsigned Dim = Inst.getOperand(DimIdx).getImm();
+  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
+
+  return DimInfo->MSAA;
+}
+
 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
 {
  switch (Opcode) {
@ -4128,6 +4152,11 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
    Error(IDLoc, "dim modifier is required on this GPU");
    return false;
  }
+  if (!validateMIMGMSAA(Inst)) {
+    Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
+          "invalid dim; must be MSAA type");
+    return false;
+  }
  if (!validateMIMGDataSize(Inst)) {
    Error(IDLoc,
      "image data size does not match dmask and tfe");
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@ -42,6 +42,7 @@ class MIMGBaseOpcode : PredicateControl {
  bit LodOrClampOrMip = 0;
  bit HasD16 = 0;
  bit IsAtomicRet = 0;
+  bit MSAA = 0;
 }

 def MIMGBaseOpcode : GenericEnum {
@ -53,7 +54,7 @@ def MIMGBaseOpcodesTable : GenericTable {
  let CppTypeName = "MIMGBaseOpcodeInfo";
  let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
                "Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
-                "LodOrClampOrMip", "HasD16"];
+                "LodOrClampOrMip", "HasD16", "MSAA"];
  string TypeOf_BaseOpcode = "MIMGBaseOpcode";

  let PrimaryKey = ["BaseOpcode"];
@ -67,7 +68,7 @@ def MIMGDim : GenericEnum {
 def MIMGDimInfoTable : GenericTable {
  let FilterClass = "AMDGPUDimProps";
  let CppTypeName = "MIMGDimInfo";
-  let Fields = ["Dim", "NumCoords", "NumGradients", "DA", "Encoding", "AsmSuffix"];
+  let Fields = ["Dim", "NumCoords", "NumGradients", "MSAA", "DA", "Encoding", "AsmSuffix"];
  string TypeOf_Dim = "MIMGDim";

  let PrimaryKey = ["Dim"];
@ -364,6 +365,7 @@ multiclass MIMG_NoSampler <mimgopc op, string asm, bit has_d16, bit mip = 0,
    let Coordinates = !not(isResInfo);
    let LodOrClampOrMip = mip;
    let HasD16 = has_d16;
+    let MSAA = msaa;
  }

  let BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@ -284,6 +284,7 @@ struct MIMGBaseOpcodeInfo {
  bool Coordinates;
  bool LodOrClampOrMip;
  bool HasD16;
+  bool MSAA;
 };

 LLVM_READONLY
@ -293,6 +294,7 @@ struct MIMGDimInfo {
  MIMGDim Dim;
  uint8_t NumCoords;
  uint8_t NumGradients;
+  bool MSAA;
  bool DA;
  uint8_t Encoding;
  const char *AsmSuffix;
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
@ -1,111 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s

-; GCN-LABEL: {{^}}load_1d:
-; GFX10: image_msaa_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
-define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_1d_tfe:
-; GFX10: image_msaa_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ;
-define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
-main_body:
-  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
-  %v.vec = extractvalue {<4 x float>, i32} %v, 0
-  %v.err = extractvalue {<4 x float>, i32} %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
-  ret <4 x float> %v.vec
-}
-
-; GCN-LABEL: {{^}}load_1d_lwe:
-; GFX10: image_msaa_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ;
-define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
-main_body:
-  %v = call {<4 x float>, i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
-  %v.vec = extractvalue {<4 x float>, i32} %v, 0
-  %v.err = extractvalue {<4 x float>, i32} %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
-  ret <4 x float> %v.vec
-}
-
-; GCN-LABEL: {{^}}load_2d:
-; GFX10: image_msaa_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
-define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_2d_tfe:
-; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ;
-define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
-main_body:
-  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
-  %v.vec = extractvalue {<4 x float>, i32} %v, 0
-  %v.err = extractvalue {<4 x float>, i32} %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
-  ret <4 x float> %v.vec
-}
-
-; GCN-LABEL: {{^}}load_3d:
-; GFX10: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
-define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_3d_tfe_lwe:
-; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ;
-define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) {
-main_body:
-  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
-  %v.vec = extractvalue {<4 x float>, i32} %v, 0
-  %v.err = extractvalue {<4 x float>, i32} %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
-  ret <4 x float> %v.vec
-}
-
-; GCN-LABEL: {{^}}load_1darray:
-; GFX10: image_msaa_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
-define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_1darray_tfe:
-; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ;
-define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) {
-main_body:
-  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
-  %v.vec = extractvalue {<4 x float>, i32} %v, 0
-  %v.err = extractvalue {<4 x float>, i32} %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
-  ret <4 x float> %v.vec
-}
-
-; GCN-LABEL: {{^}}load_2darray:
-; GFX10: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
-define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_2darray_lwe:
-; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ;
-define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
-main_body:
-  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
-  %v.vec = extractvalue {<4 x float>, i32} %v, 0
-  %v.err = extractvalue {<4 x float>, i32} %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
-  ret <4 x float> %v.vec
-}
-
 ; GCN-LABEL: {{^}}load_2dmsaa:
 ; GFX10: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
@ -144,110 +38,98 @@ main_body:
  ret <4 x float> %v.vec
 }

-; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask3:
-; GFX10: image_msaa_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ;
-define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask3:
+; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ;
+define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 7, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
  %v.vec = extractvalue {<4 x float>, i32} %v, 0
  %v.err = extractvalue {<4 x float>, i32} %v, 1
  store i32 %v.err, i32 addrspace(1)* %out, align 4
  ret <4 x float> %v.vec
 }

-; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask2:
-; GFX10: image_msaa_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ;
-define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask2:
+; GFX10: image_msaa_load v[0:2], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ;
+define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 6, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
  %v.vec = extractvalue {<4 x float>, i32} %v, 0
  %v.err = extractvalue {<4 x float>, i32} %v, 1
  store i32 %v.err, i32 addrspace(1)* %out, align 4
  ret <4 x float> %v.vec
 }

-; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask1:
-; GFX10: image_msaa_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ;
-define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask1:
+; GFX10: image_msaa_load v[0:1], [v4, v3, v2], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ;
+define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
  %v.vec = extractvalue {<4 x float>, i32} %v, 0
  %v.err = extractvalue {<4 x float>, i32} %v, 1
  store i32 %v.err, i32 addrspace(1)* %out, align 4
  ret <4 x float> %v.vec
 }

-; GCN-LABEL: {{^}}load_1d_tfe_V2_dmask1:
-; GFX10: image_msaa_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ;
-define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_tfe_V2_dmask1:
+; GFX10: image_msaa_load v[0:1], [v4, v3, v2], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ;
+define amdgpu_ps <2 x float> @load_2dmsaa_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+  %v = call {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
  %v.vec = extractvalue {<2 x float>, i32} %v, 0
  %v.err = extractvalue {<2 x float>, i32} %v, 1
  store i32 %v.err, i32 addrspace(1)* %out, align 4
  ret <2 x float> %v.vec
 }

-; GCN-LABEL: {{^}}load_1d_V1:
-; GFX10: image_msaa_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm ;
-define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_V1:
+; GFX10: image_msaa_load v0, v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm ;
+define amdgpu_ps float @load_2dmsaa_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call float @llvm.amdgcn.image.msaa.load.x.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %v = call float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  ret float %v
 }

-; GCN-LABEL: {{^}}load_1d_V2:
-; GFX10: image_msaa_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm ;
-define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_V2:
+; GFX10: image_msaa_load v[0:1], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_2D_MSAA unorm ;
+define amdgpu_ps <2 x float> @load_2dmsaa_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call <2 x float> @llvm.amdgcn.image.msaa.load.x.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %v = call <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  ret <2 x float> %v
 }

-; GCN-LABEL: {{^}}load_1d_glc:
-; GFX10: image_msaa_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ;
-define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_glc:
+; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm glc ;
+define amdgpu_ps <4 x float> @load_2dmsaa_glc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 1)
  ret <4 x float> %v
 }

-; GCN-LABEL: {{^}}load_1d_slc:
-; GFX10: image_msaa_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ;
-define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_slc:
+; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm slc ;
+define amdgpu_ps <4 x float> @load_2dmsaa_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
+  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 2)
  ret <4 x float> %v
 }

-; GCN-LABEL: {{^}}load_1d_glc_slc:
-; GFX10: image_msaa_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ;
-define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: {{^}}load_2dmsaa_glc_slc:
+; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc ;
+define amdgpu_ps <4 x float> @load_2dmsaa_glc_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
 main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
+  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 3)
  ret <4 x float> %v
 }

-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.msaa.load.x.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1

-declare float @llvm.amdgcn.image.msaa.load.x.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.msaa.load.x.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.msaa.load.x.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1

 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
--- a/llvm/test/MC/AMDGPU/gfx1030_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1030_err.s
@ -143,3 +143,9 @@ buffer_atomic_csub v5, off, s[8:11], s3 offset:4095

 global_atomic_csub v2, v[0:1], v2, off offset:100 slc
 // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc
+
+image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid dim; must be MSAA type
+
+image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid dim; must be MSAA type
--- a/llvm/test/MC/AMDGPU/gfx1030_new.s
+++ b/llvm/test/MC/AMDGPU/gfx1030_new.s
@ -105,17 +105,17 @@ image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19
 image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19], s[12:15] a16
 // GFX10: encoding: [0x05,0x9f,0x9c,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13]

-image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
-// GFX10: encoding: [0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00]
+image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA
+// GFX10: encoding: [0x31,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00]

-image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D glc
-// GFX10: encoding: [0x01,0x2f,0x00,0xf0,0x05,0x01,0x02,0x00]
+image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA glc
+// GFX10: encoding: [0x31,0x2f,0x00,0xf0,0x05,0x01,0x02,0x00]

-image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16
-// GFX10: encoding: [0x09,0x01,0x00,0xf0,0x01,0x05,0x02,0x80]
+image_msaa_load v5, v[1:3], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA d16
+// GFX10: encoding: [0x31,0x01,0x00,0xf0,0x01,0x05,0x02,0x80]

-image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
-// GFX10: encoding: [0x01,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00]
+image_msaa_load v[1:4], v[5:8], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
+// GFX10: encoding: [0x39,0x0f,0x00,0xf0,0x05,0x01,0x02,0x00]

 image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
 // GFX10: encoding: [0x3b,0x01,0x00,0xf0,0xcc,0x0e,0x0a,0x00,0x0b,0x0e,0x13,0x00]