[DAG] Convert truncstore(extend(x)) back to store(x)

Pulled out of D106237, this folds truncstore(extend(x)) back to store(x)
if the original store was legal. This can come up due to the order we
fold nodes. A fold from X86 needs to be adjusted to prevent infinite
loops, to have it pick the operand of a trunc more directly.

Differential Revision: https://reviews.llvm.org/D117901
This commit is contained in:
David Green 2022-01-22 13:20:36 +00:00
parent 0283b07746
commit b27e5459d5
3 changed files with 18 additions and 9 deletions

View File

@ -18396,6 +18396,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Value.getValueType().isInteger() &&
(!isa<ConstantSDNode>(Value) ||
!cast<ConstantSDNode>(Value)->isOpaque())) {
// Convert a truncating store of a extension into a standard store.
if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
Value.getOpcode() == ISD::SIGN_EXTEND ||
Value.getOpcode() == ISD::ANY_EXTEND) &&
Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemOperand());
APInt TruncDemandedBits =
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits());

View File

@ -48170,7 +48170,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
St->getValue().getOperand(0));
return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
MVT::v16i8, St->getMemOperand());
}

View File

@ -543,17 +543,16 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s8, s2
; VI-NEXT: s_mov_b32 s9, s3
; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
; VI-NEXT: buffer_load_ubyte v2, off, s[8:11], 0 offset:2
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:2
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: s_waitcnt vmcnt(1)
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:1
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:2
; VI-NEXT: s_waitcnt vmcnt(1)
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:2
; VI-NEXT: v_lshrrev_b16_e32 v0, 8, v0
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 offset:1
; VI-NEXT: s_endpgm
%val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 1