forked from OSchip/llvm-project
AMDGPU/R600: Serialize vector trunc stores to private AS
Add DUMMY_CHAIN SDNode to denote stores of interest Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=28915 Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=30411 Differential Revision: https://reviews.llvm.org/D27964 llvm-svn: 292651
This commit is contained in:
parent
74694b19e0
commit
f170504c41
|
@ -3278,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(CONST_DATA_PTR)
|
||||
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
|
||||
NODE_NAME_CASE(KILL)
|
||||
NODE_NAME_CASE(DUMMY_CHAIN)
|
||||
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
|
||||
NODE_NAME_CASE(SENDMSG)
|
||||
NODE_NAME_CASE(SENDMSGHALT)
|
||||
|
|
|
@ -330,6 +330,7 @@ enum NodeType : unsigned {
|
|||
INTERP_P2,
|
||||
PC_ADD_REL_OFFSET,
|
||||
KILL,
|
||||
DUMMY_CHAIN,
|
||||
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
STORE_MSKOR,
|
||||
LOAD_CONSTANT,
|
||||
|
|
|
@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
|
|||
// This argument to this node is a dword address.
|
||||
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
|
||||
|
||||
// Force dependencies for vector trunc stores
|
||||
def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
|
||||
|
||||
def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
|
||||
def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
|
||||
|
||||
|
|
|
@ -1120,7 +1120,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
|||
llvm_unreachable("Unsupported private trunc store");
|
||||
}
|
||||
|
||||
SDValue Chain = Store->getChain();
|
||||
SDValue OldChain = Store->getChain();
|
||||
bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
|
||||
// Skip dummy
|
||||
SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
|
||||
SDValue BasePtr = Store->getBasePtr();
|
||||
SDValue Offset = Store->getOffset();
|
||||
EVT MemVT = Store->getMemoryVT();
|
||||
|
@ -1176,7 +1179,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
|||
|
||||
// Store dword
|
||||
// TODO: Can we be smarter about MachinePointerInfo?
|
||||
return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
|
||||
SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
|
||||
|
||||
// If we are part of expanded vector, make our neighbors depend on this store
|
||||
if (VectorTrunc) {
|
||||
// Make all other vector elements depend on this store
|
||||
Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
|
||||
DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
|
||||
}
|
||||
return NewStore;
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
@ -1196,6 +1207,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
|||
// Neither LOCAL nor PRIVATE can do vectors at the moment
|
||||
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
VT.isVector()) {
|
||||
if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
|
||||
// Add an extra level of chain to isolate this vector
|
||||
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
|
||||
// TODO: can the chain be replaced without creating a new store?
|
||||
SDValue NewStore = DAG.getTruncStore(
|
||||
NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
|
||||
MemVT, StoreNode->getAlignment(),
|
||||
StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
|
||||
StoreNode = cast<StoreSDNode>(NewStore);
|
||||
}
|
||||
|
||||
return scalarizeVectorStore(StoreNode, DAG);
|
||||
}
|
||||
|
||||
|
@ -1230,7 +1252,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
|||
// Put the mask in correct place
|
||||
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
|
||||
|
||||
// Put the mask in correct place
|
||||
// Put the value bits in correct place
|
||||
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
|
||||
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
|
||||
|
||||
|
|
|
@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
|
|||
|
||||
def MOV : R600_1OP <0x19, "MOV", []>;
|
||||
|
||||
|
||||
// This is a hack to get rid of DUMMY_CHAIN nodes.
|
||||
// Most DUMMY_CHAINs should be eliminated during legalization, but undef
|
||||
// values can sneak in some to selection.
|
||||
let isPseudo = 1, isCodeGenOnly = 1 in {
|
||||
def DUMMY_CHAIN : AMDGPUInst <
|
||||
(outs),
|
||||
(ins),
|
||||
"DUMMY_CHAIN",
|
||||
[(R600dummy_chain)]
|
||||
>;
|
||||
} // end let isPseudo = 1, isCodeGenOnly = 1
|
||||
|
||||
|
||||
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
|
||||
|
||||
class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
|
||||
|
|
|
@ -708,10 +708,11 @@ define void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8>
|
|||
; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
|
||||
|
||||
; EG: LDS_READ_RET
|
||||
; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: ASHR
|
||||
; EG: LDS_WRITE
|
||||
; EG: LDS_WRITE
|
||||
define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
|
||||
|
@ -740,14 +741,15 @@ define void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8>
|
|||
|
||||
; EG: LDS_READ_RET
|
||||
; EG: LDS_READ_RET
|
||||
; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG: LDS_WRITE
|
||||
; EG: LDS_WRITE
|
||||
; EG: LDS_WRITE
|
||||
|
@ -786,6 +788,11 @@ define void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x
|
|||
; EG: LDS_READ_RET
|
||||
; EG: LDS_READ_RET
|
||||
; EG: LDS_READ_RET
|
||||
; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
|
@ -798,10 +805,6 @@ define void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x
|
|||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG: LDS_WRITE
|
||||
; EG: LDS_WRITE
|
||||
; EG: LDS_WRITE
|
||||
|
@ -860,6 +863,11 @@ define void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x
|
|||
; EG: LDS_READ_RET
|
||||
; EG: LDS_READ_RET
|
||||
; EG: LDS_READ_RET
|
||||
; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
|
@ -884,14 +892,6 @@ define void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x
|
|||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: BFE_INT
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG-DAG: ASHR
|
||||
; EG: LDS_WRITE
|
||||
; EG: LDS_WRITE
|
||||
; EG: LDS_WRITE
|
||||
|
|
Loading…
Reference in New Issue