AMDGPU: Use unique PSVs for buffer resources

Also fixes using the wrong memory type for some
intrinsics when custom lowering them.

llvm-svn: 321557
This commit is contained in:
Matt Arsenault 2017-12-29 17:18:21 +00:00
parent d94b63d765
commit e19bc2ee0f
4 changed files with 93 additions and 45 deletions

View File

@ -462,7 +462,7 @@ class AMDGPUBufferLoad : Intrinsic <
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[IntrReadMem]>;
[IntrReadMem], "", [SDNPMemOperand]>;
def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
def int_amdgcn_buffer_load : AMDGPUBufferLoad;
@ -474,7 +474,7 @@ class AMDGPUBufferStore : Intrinsic <
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[IntrWriteMem]>;
[IntrWriteMem], "", [SDNPMemOperand]>;
def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
def int_amdgcn_buffer_store : AMDGPUBufferStore;
@ -489,7 +489,7 @@ def int_amdgcn_tbuffer_load : Intrinsic <
llvm_i32_ty, // nfmt(imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[]>;
[IntrReadMem], "", [SDNPMemOperand]>;
def int_amdgcn_tbuffer_store : Intrinsic <
[],
@ -503,7 +503,7 @@ def int_amdgcn_tbuffer_store : Intrinsic <
llvm_i32_ty, // nfmt(imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[]>;
[IntrWriteMem], "", [SDNPMemOperand]>;
class AMDGPUBufferAtomic : Intrinsic <
[llvm_i32_ty],
@ -512,7 +512,7 @@ class AMDGPUBufferAtomic : Intrinsic <
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty], // slc(imm)
[]>;
[], "", [SDNPMemOperand]>;
def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
@ -531,7 +531,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty], // slc(imm)
[]>;
[], "", [SDNPMemOperand]>;
// Uses that do not set the done bit should set IntrWriteMem on the
// call site.

View File

@ -726,6 +726,70 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags |= MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_tbuffer_load:
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
CI.getArgOperand(0));
Info.memVT = MVT::getVT(CI.getType());
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MODereferenceable;
// There is a constant offset component, but there are additional register
// offsets which could break AA if we set the offset to anything non-0.
return true;
}
case Intrinsic::amdgcn_tbuffer_store:
case Intrinsic::amdgcn_buffer_store:
case Intrinsic::amdgcn_buffer_store_format: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_VOID;
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
CI.getArgOperand(1));
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
Info.flags = MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable;
return true;
}
case Intrinsic::amdgcn_buffer_atomic_swap:
case Intrinsic::amdgcn_buffer_atomic_add:
case Intrinsic::amdgcn_buffer_atomic_sub:
case Intrinsic::amdgcn_buffer_atomic_smin:
case Intrinsic::amdgcn_buffer_atomic_umin:
case Intrinsic::amdgcn_buffer_atomic_smax:
case Intrinsic::amdgcn_buffer_atomic_umax:
case Intrinsic::amdgcn_buffer_atomic_and:
case Intrinsic::amdgcn_buffer_atomic_or:
case Intrinsic::amdgcn_buffer_atomic_xor: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
CI.getArgOperand(1));
Info.memVT = MVT::getVT(CI.getType());
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
CI.getArgOperand(2));
Info.memVT = MVT::getVT(CI.getType());
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile;
return true;
}
default:
return false;
}
@ -4396,7 +4460,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
@ -4423,21 +4486,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // glc
Op.getOperand(6) // slc
};
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(MFI->getBufferPSV()),
MachineMemOperand::MOLoad,
VT.getStoreSize(), VT.getStoreSize());
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
auto *M = cast<MemSDNode>(Op);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
M->getMemOperand());
}
case Intrinsic::amdgcn_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
@ -4451,14 +4511,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(10) // slc
};
EVT VT = Op.getOperand(2).getValueType();
EVT VT = Op.getValueType();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad,
VT.getStoreSize(), VT.getStoreSize());
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
Op->getVTList(), Ops, VT, MMO);
Op->getVTList(), Ops, VT, M->getMemOperand());
}
case Intrinsic::amdgcn_buffer_atomic_swap:
case Intrinsic::amdgcn_buffer_atomic_add:
@ -4478,14 +4534,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // offset
Op.getOperand(6) // slc
};
EVT VT = Op.getOperand(3).getValueType();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile,
VT.getStoreSize(), 4);
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
unsigned Opcode = 0;
switch (IntrID) {
@ -4523,7 +4574,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
llvm_unreachable("unhandled atomic opcode");
}
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
M->getMemOperand());
}
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
@ -4536,17 +4588,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // offset
Op.getOperand(7) // slc
};
EVT VT = Op.getOperand(4).getValueType();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile,
VT.getStoreSize(), 4);
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, MMO);
Op->getVTList(), Ops, VT, M->getMemOperand());
}
// Basic sample.

View File

@ -28,7 +28,6 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
BufferPSV(*(MF.getSubtarget().getInstrInfo())),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),

View File

@ -137,12 +137,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// Stack object indices for work item IDs.
std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
AMDGPUBufferPseudoSourceValue BufferPSV;
DenseMap<const Value *,
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
DenseMap<const Value *,
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
private:
unsigned LDSWaveSpillSize = 0;
unsigned NumUserSGPRs = 0;
@ -634,9 +633,13 @@ public:
return LDSWaveSpillSize;
}
// FIXME: These should be unique
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
return &BufferPSV;
const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
const Value *BufferRsrc) {
assert(BufferRsrc);
auto PSV = BufferPSVs.try_emplace(
BufferRsrc,
llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
return PSV.first->second.get();
}
const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,