[LegalizeTypes] Scalarize non-byte sized loads in WidenRecRes_Load and SplitVecResLoad

Should fix PR42803 and PR44902

Differential Revision: https://reviews.llvm.org/D74590
This commit is contained in:
Craig Topper 2020-02-24 14:08:05 -08:00
parent 0ed4744bb5
commit a5fa778882
4 changed files with 85 additions and 31 deletions

View File

@ -1505,6 +1505,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) {
SDValue Value, NewChain;
std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
std::tie(Lo, Hi) = DAG.SplitVector(Value, dl);
ReplaceValueWith(SDValue(LD, 1), NewChain);
return;
}
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
@ -3667,6 +3675,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
// A vector must always be stored in memory as-is, i.e. without any padding
// between the elements, since various code depend on it, e.g. in the
// handling of a bitcast of a vector type to int, which may be done with a
// vector store followed by an integer load. A vector that does not have
// elements that are byte-sized must therefore be stored as an integer
// built out of the extracted vector elements.
if (!LD->getMemoryVT().isByteSized()) {
SDValue Value, NewChain;
std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
ReplaceValueWith(SDValue(LD, 0), Value);
ReplaceValueWith(SDValue(LD, 1), NewChain);
return SDValue();
}
SDValue Result;
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
if (ExtType != ISD::NON_EXTLOAD)

View File

@ -6584,12 +6584,48 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
EVT SrcVT = LD->getMemoryVT();
EVT DstVT = LD->getValueType(0);
ISD::LoadExtType ExtType = LD->getExtensionType();
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
EVT DstEltVT = LD->getValueType(0).getScalarType();
EVT DstEltVT = DstVT.getScalarType();
// A vector must always be stored in memory as-is, i.e. without any padding
// between the elements, since various code depend on it, e.g. in the
// handling of a bitcast of a vector type to int, which may be done with a
// vector store followed by an integer load. A vector that does not have
// elements that are byte-sized must therefore be stored as an integer
// built out of the extracted vector elements.
if (!SrcEltVT.isByteSized()) {
unsigned NumBits = SrcVT.getSizeInBits();
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
SDValue Load = DAG.getLoad(IntVT, SL, Chain, BasePTR, LD->getPointerInfo(),
LD->getAlignment(),
LD->getMemOperand()->getFlags(),
LD->getAAInfo());
SmallVector<SDValue, 8> Vals;
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
unsigned ShiftIntoIdx =
(DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
SDValue ShiftAmount =
DAG.getConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), SL, IntVT);
SDValue ShiftedElt =
DAG.getNode(ISD::SRL, SL, IntVT, Load, ShiftAmount);
SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, ShiftedElt);
if (ExtType != ISD::NON_EXTLOAD) {
unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
}
Vals.push_back(Scalar);
}
SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
return std::make_pair(Value, Load.getValue(1));
}
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
assert(SrcEltVT.isByteSized());
@ -6611,7 +6647,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
return std::make_pair(Value, NewChain);
}

View File

@ -121,18 +121,20 @@ define void @fun2(<8 x i32> %src, <8 x i31>* %p)
define void @fun3(<3 x i31>* %src, <3 x i31>* %p)
; CHECK-LABEL: fun3:
; CHECK: # %bb.0:
; CHECK-NEXT: llgf %r0, 3(%r2)
; CHECK-NEXT: llgf %r1, 6(%r2)
; CHECK-NEXT: llgf %r2, 0(%r2)
; CHECK-NEXT: rosbg %r1, %r0, 0, 32, 31
; CHECK-NEXT: sllg %r4, %r2, 62
; CHECK-NEXT: rosbg %r4, %r0, 0, 32, 31
; CHECK-NEXT: srlg %r0, %r4, 32
; CHECK-NEXT: st %r1, 8(%r3)
; CHECK-NEXT: sllg %r1, %r2, 30
; CHECK-NEXT: lr %r1, %r0
; CHECK-NEXT: nihh %r1, 8191
; CHECK-NEXT: stg %r1, 0(%r3)
; CHECK-NEXT: l %r0, 8(%r2)
; CHECK-NEXT: lg %r1, 0(%r2)
; CHECK-NEXT: sllg %r2, %r1, 32
; CHECK-NEXT: lr %r2, %r0
; CHECK-NEXT: srlg %r0, %r2, 62
; CHECK-NEXT: st %r2, 8(%r3)
; CHECK-NEXT: rosbg %r0, %r1, 33, 61, 34
; CHECK-NEXT: sllg %r1, %r0, 62
; CHECK-NEXT: rosbg %r1, %r2, 2, 32, 0
; CHECK-NEXT: srlg %r1, %r1, 32
; CHECK-NEXT: sllg %r0, %r0, 30
; CHECK-NEXT: lr %r0, %r1
; CHECK-NEXT: nihh %r0, 8191
; CHECK-NEXT: stg %r0, 0(%r3)
; CHECK-NEXT: br %r14
{
%tmp = load <3 x i31>, <3 x i31>* %src

View File

@ -96,27 +96,21 @@ define void @local_load_v3i1(i32 addrspace(1)* %out, i32 addrspace(1)* %in, <3 x
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq %rdi, %r14
; CHECK-NEXT: movzbl (%rdx), %ebp
; CHECK-NEXT: movl %ebp, %eax
; CHECK-NEXT: shrl %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: movl %ebp, %ecx
; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: pinsrd $1, %eax, %xmm0
; CHECK-NEXT: shrl $2, %ebp
; CHECK-NEXT: andl $1, %ebp
; CHECK-NEXT: pinsrd $2, %ebp, %xmm0
; CHECK-NEXT: movd %xmm0, %ebx
; CHECK-NEXT: pextrd $1, %xmm0, %r15d
; CHECK-NEXT: movb (%rdx), %al
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shrb $2, %cl
; CHECK-NEXT: movzbl %al, %r15d
; CHECK-NEXT: shrb %al
; CHECK-NEXT: movzbl %al, %ebx
; CHECK-NEXT: movzbl %cl, %ebp
; CHECK-NEXT: movq %rsi, %rdi
; CHECK-NEXT: movl %ebx, %esi
; CHECK-NEXT: movl %r15d, %edx
; CHECK-NEXT: movl %r15d, %esi
; CHECK-NEXT: movl %ebx, %edx
; CHECK-NEXT: movl %ebp, %ecx
; CHECK-NEXT: callq masked_load_v3
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: movl %ebx, %esi
; CHECK-NEXT: movl %r15d, %edx
; CHECK-NEXT: movl %r15d, %esi
; CHECK-NEXT: movl %ebx, %edx
; CHECK-NEXT: movl %ebp, %ecx
; CHECK-NEXT: callq masked_store4_v3
; CHECK-NEXT: addq $8, %rsp