forked from OSchip/llvm-project
R600: Add support for vector local memory loads
llvm-svn: 189226
This commit is contained in:
parent
c6f4a29ed5
commit
35bb18c2a7
|
@ -424,6 +424,29 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
|
||||||
return Op;
|
return Op;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
|
||||||
|
EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
|
||||||
|
EVT EltVT = Op.getValueType().getVectorElementType();
|
||||||
|
EVT PtrVT = Load->getBasePtr().getValueType();
|
||||||
|
unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
|
||||||
|
SmallVector<SDValue, 8> Loads;
|
||||||
|
SDLoc SL(Op);
|
||||||
|
|
||||||
|
for (unsigned i = 0, e = NumElts; i != e; ++i) {
|
||||||
|
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
|
||||||
|
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
|
||||||
|
Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
|
||||||
|
Load->getChain(), Ptr,
|
||||||
|
MachinePointerInfo(Load->getMemOperand()->getValue()),
|
||||||
|
MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
|
||||||
|
Load->getAlignment()));
|
||||||
|
}
|
||||||
|
return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
|
||||||
|
Loads.size());
|
||||||
|
}
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
|
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
|
StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
|
||||||
|
|
|
@ -50,6 +50,8 @@ protected:
|
||||||
unsigned Reg, EVT VT) const;
|
unsigned Reg, EVT VT) const;
|
||||||
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
||||||
SelectionDAG &DAG) const;
|
SelectionDAG &DAG) const;
|
||||||
|
/// \brief Split a vector load into multiple scalar loads.
|
||||||
|
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
bool isHWTrueValue(SDValue Op) const;
|
bool isHWTrueValue(SDValue Op) const;
|
||||||
bool isHWFalseValue(SDValue Op) const;
|
bool isHWFalseValue(SDValue Op) const;
|
||||||
|
|
|
@ -1155,6 +1155,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
||||||
SDValue Ptr = Op.getOperand(1);
|
SDValue Ptr = Op.getOperand(1);
|
||||||
SDValue LoweredLoad;
|
SDValue LoweredLoad;
|
||||||
|
|
||||||
|
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
|
||||||
|
SDValue MergedValues[2] = {
|
||||||
|
SplitVectorLoad(Op, DAG),
|
||||||
|
Chain
|
||||||
|
};
|
||||||
|
return DAG.getMergeValues(MergedValues, 2, DL);
|
||||||
|
}
|
||||||
|
|
||||||
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
|
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
|
||||||
if (ConstantBlock > -1) {
|
if (ConstantBlock > -1) {
|
||||||
SDValue Result;
|
SDValue Result;
|
||||||
|
|
|
@ -66,6 +66,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||||
|
|
||||||
setOperationAction(ISD::BITCAST, MVT::i128, Legal);
|
setOperationAction(ISD::BITCAST, MVT::i128, Legal);
|
||||||
|
|
||||||
|
// We need to custom lower vector stores from local memory
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
||||||
|
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||||
|
|
||||||
|
@ -368,6 +372,19 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||||
switch (Op.getOpcode()) {
|
switch (Op.getOpcode()) {
|
||||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||||
|
case ISD::LOAD: {
|
||||||
|
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
|
||||||
|
if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
|
||||||
|
Op.getValueType().isVector()) {
|
||||||
|
SDValue MergedValues[2] = {
|
||||||
|
SplitVectorLoad(Op, DAG),
|
||||||
|
Load->getChain()
|
||||||
|
};
|
||||||
|
return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
|
||||||
|
} else {
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||||
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
|
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
|
||||||
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
|
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
|
||||||
|
|
|
@ -516,3 +516,17 @@ entry:
|
||||||
store float %0, float addrspace(1)* %out
|
store float %0, float addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; load a v2f32 value from the local address space
|
||||||
|
; R600-CHECK: @load_v2f32_local
|
||||||
|
; R600-CHECK: LDS_READ_RET
|
||||||
|
; R600-CHECK: LDS_READ_RET
|
||||||
|
; SI-CHECK: @load_v2f32_local
|
||||||
|
; SI-CHECK: DS_READ_B32
|
||||||
|
; SI-CHECK: DS_READ_B32
|
||||||
|
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
|
||||||
|
entry:
|
||||||
|
%0 = load <2 x float> addrspace(3)* %in
|
||||||
|
store <2 x float> %0, <2 x float> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue