forked from OSchip/llvm-project
[DAG] Convert truncstore(extend(x)) back to store(x)
Pulled out of D106237, this folds truncstore(extend(x)) back to store(x) if the original store was legal. This can come up due to the order we fold nodes. A fold from X86 needs to be adjusted to prevent infinite loops, to have it pick the operand of a trunc more directly. Differential Revision: https://reviews.llvm.org/D117901
This commit is contained in:
parent
0283b07746
commit
b27e5459d5
|
@ -18396,6 +18396,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
|
|||
Value.getValueType().isInteger() &&
|
||||
(!isa<ConstantSDNode>(Value) ||
|
||||
!cast<ConstantSDNode>(Value)->isOpaque())) {
|
||||
// Convert a truncating store of a extension into a standard store.
|
||||
if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
|
||||
Value.getOpcode() == ISD::SIGN_EXTEND ||
|
||||
Value.getOpcode() == ISD::ANY_EXTEND) &&
|
||||
Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
|
||||
TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
|
||||
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
|
||||
ST->getMemOperand());
|
||||
|
||||
APInt TruncDemandedBits =
|
||||
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
|
||||
ST->getMemoryVT().getScalarSizeInBits());
|
||||
|
|
|
@ -48170,7 +48170,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
|||
St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
|
||||
TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
|
||||
St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
|
||||
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
|
||||
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
|
||||
St->getValue().getOperand(0));
|
||||
return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
|
||||
MVT::v16i8, St->getMemOperand());
|
||||
}
|
||||
|
|
|
@ -543,17 +543,16 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
|
|||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_mov_b32 s8, s2
|
||||
; VI-NEXT: s_mov_b32 s9, s3
|
||||
; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
|
||||
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
|
||||
; VI-NEXT: buffer_load_ubyte v2, off, s[8:11], 0 offset:2
|
||||
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
|
||||
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:2
|
||||
; VI-NEXT: s_mov_b32 s4, s0
|
||||
; VI-NEXT: s_mov_b32 s5, s1
|
||||
; VI-NEXT: s_waitcnt vmcnt(2)
|
||||
; VI-NEXT: s_waitcnt vmcnt(1)
|
||||
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
||||
; VI-NEXT: s_waitcnt vmcnt(2)
|
||||
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:1
|
||||
; VI-NEXT: s_waitcnt vmcnt(2)
|
||||
; VI-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:2
|
||||
; VI-NEXT: s_waitcnt vmcnt(1)
|
||||
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:2
|
||||
; VI-NEXT: v_lshrrev_b16_e32 v0, 8, v0
|
||||
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 offset:1
|
||||
; VI-NEXT: s_endpgm
|
||||
%val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1
|
||||
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 1
|
||||
|
|
Loading…
Reference in New Issue