[AMDGPU] Honor !invariant.load metadata on load-like intrinsics

Differential Revision: https://reviews.llvm.org/D119739
This commit is contained in:
Jay Foad 2022-02-14 17:50:21 +00:00
parent cb654c7de2
commit f72d8897ac
2 changed files with 30 additions and 25 deletions

View File

@ -1120,6 +1120,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
unsigned IntrID) const {
Info.flags = MachineMemOperand::MONone;
if (CI.hasMetadata(LLVMContext::MD_invariant_load))
Info.flags |= MachineMemOperand::MOInvariant;
if (const AMDGPU::RsrcIntrinsic *RsrcIntr =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
AttributeList Attr = Intrinsic::getAttributes(CI.getContext(),
@ -1138,7 +1142,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MFI->getBufferPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
}
Info.flags = MachineMemOperand::MODereferenceable;
Info.flags |= MachineMemOperand::MODereferenceable;
if (Attr.hasFnAttr(Attribute::ReadOnly)) {
unsigned DMaskLanes = 4;
@ -1180,9 +1184,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable;
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable;
// XXX - Should this be volatile without known ordering?
Info.flags |= MachineMemOperand::MOVolatile;
@ -1202,7 +1206,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
if (!Vol->isZero())
@ -1218,7 +1222,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal =
MFI->getBufferPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
Info.align.reset();
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
if (!Vol || !Vol->isZero())
@ -1232,7 +1236,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
if (!Vol->isZero())
@ -1245,9 +1249,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
@ -1257,8 +1261,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal =
MFI->getImagePSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
Info.align.reset();
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MODereferenceable;
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MODereferenceable;
return true;
}
case Intrinsic::amdgcn_global_atomic_fadd:
@ -1271,10 +1275,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile;
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_ds_gws_init:
@ -1294,9 +1298,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.size = 4;
Info.align = Align(4);
Info.flags = MachineMemOperand::MOStore;
if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
Info.flags = MachineMemOperand::MOLoad;
Info.flags |= MachineMemOperand::MOLoad;
else
Info.flags |= MachineMemOperand::MOStore;
return true;
}
default:

View File

@ -8,13 +8,13 @@ define amdgpu_ps void @test(<8 x i32> inreg %load, <8 x i32> inreg %store) {
; GFX9-LABEL: test:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: image_load v1, v0, s[0:7] dmask:0x1 unorm
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: image_store v1, v0, s[8:15] dmask:0x1 unorm
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: image_load v1, v0, s[0:7] dmask:0x1 unorm
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: image_store v1, v0, s[8:15] dmask:0x1 unorm
; GFX9-NEXT: v_mov_b32_e32 v1, 1
; GFX9-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
; GFX9-NEXT: image_load v3, v1, s[0:7] dmask:0x1 unorm
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: image_store v2, v0, s[8:15] dmask:0x1 unorm
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: image_store v3, v1, s[8:15] dmask:0x1 unorm
; GFX9-NEXT: s_endpgm
%data0 = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 0, <8 x i32> %load, i32 0, i32 0), !invariant.load !0
call void @llvm.amdgcn.image.store.1d.f32.i32(float %data0, i32 1, i32 0, <8 x i32> %store, i32 0, i32 0)