forked from OSchip/llvm-project
[AMDGPU] Honor !invariant.load metadata on load-like intrinsics
Differential Revision: https://reviews.llvm.org/D119739
This commit is contained in:
parent
cb654c7de2
commit
f72d8897ac
|
@ -1120,6 +1120,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
const CallInst &CI,
|
||||
MachineFunction &MF,
|
||||
unsigned IntrID) const {
|
||||
Info.flags = MachineMemOperand::MONone;
|
||||
if (CI.hasMetadata(LLVMContext::MD_invariant_load))
|
||||
Info.flags |= MachineMemOperand::MOInvariant;
|
||||
|
||||
if (const AMDGPU::RsrcIntrinsic *RsrcIntr =
|
||||
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
|
||||
AttributeList Attr = Intrinsic::getAttributes(CI.getContext(),
|
||||
|
@ -1138,7 +1142,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
MFI->getBufferPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
|
||||
}
|
||||
|
||||
Info.flags = MachineMemOperand::MODereferenceable;
|
||||
Info.flags |= MachineMemOperand::MODereferenceable;
|
||||
if (Attr.hasFnAttr(Attribute::ReadOnly)) {
|
||||
unsigned DMaskLanes = 4;
|
||||
|
||||
|
@ -1180,9 +1184,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
|
||||
ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
Info.flags |= MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
|
||||
// XXX - Should this be volatile without known ordering?
|
||||
Info.flags |= MachineMemOperand::MOVolatile;
|
||||
|
@ -1202,7 +1206,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.ptrVal = CI.getOperand(0);
|
||||
Info.align.reset();
|
||||
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
|
||||
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
|
||||
|
||||
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
|
||||
if (!Vol->isZero())
|
||||
|
@ -1218,7 +1222,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.ptrVal =
|
||||
MFI->getBufferPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
|
||||
Info.align.reset();
|
||||
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
|
||||
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
|
||||
|
||||
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
|
||||
if (!Vol || !Vol->isZero())
|
||||
|
@ -1232,7 +1236,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.ptrVal = CI.getOperand(0);
|
||||
Info.align.reset();
|
||||
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
|
||||
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
|
||||
|
||||
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
|
||||
if (!Vol->isZero())
|
||||
|
@ -1245,9 +1249,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.ptrVal = CI.getOperand(0);
|
||||
Info.align.reset();
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MOVolatile;
|
||||
Info.flags |= MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MOVolatile;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
|
||||
|
@ -1257,8 +1261,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.ptrVal =
|
||||
MFI->getImagePSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
|
||||
Info.align.reset();
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
Info.flags |= MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_global_atomic_fadd:
|
||||
|
@ -1271,10 +1275,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.ptrVal = CI.getOperand(0);
|
||||
Info.align.reset();
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOVolatile;
|
||||
Info.flags |= MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOVolatile;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_ds_gws_init:
|
||||
|
@ -1294,9 +1298,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.size = 4;
|
||||
Info.align = Align(4);
|
||||
|
||||
Info.flags = MachineMemOperand::MOStore;
|
||||
if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
|
||||
Info.flags = MachineMemOperand::MOLoad;
|
||||
Info.flags |= MachineMemOperand::MOLoad;
|
||||
else
|
||||
Info.flags |= MachineMemOperand::MOStore;
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
|
|
|
@ -8,13 +8,13 @@ define amdgpu_ps void @test(<8 x i32> inreg %load, <8 x i32> inreg %store) {
|
|||
; GFX9-LABEL: test:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: image_load v1, v0, s[0:7] dmask:0x1 unorm
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: image_store v1, v0, s[8:15] dmask:0x1 unorm
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 1
|
||||
; GFX9-NEXT: image_load v1, v0, s[0:7] dmask:0x1 unorm
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: image_store v1, v0, s[8:15] dmask:0x1 unorm
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 1
|
||||
; GFX9-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
|
||||
; GFX9-NEXT: image_load v3, v1, s[0:7] dmask:0x1 unorm
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(1)
|
||||
; GFX9-NEXT: image_store v2, v0, s[8:15] dmask:0x1 unorm
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(1)
|
||||
; GFX9-NEXT: image_store v3, v1, s[8:15] dmask:0x1 unorm
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%data0 = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 0, <8 x i32> %load, i32 0, i32 0), !invariant.load !0
|
||||
call void @llvm.amdgcn.image.store.1d.f32.i32(float %data0, i32 1, i32 0, <8 x i32> %store, i32 0, i32 0)
|
||||
|
|
Loading…
Reference in New Issue