forked from OSchip/llvm-project
implement SplitVecOp_CONCAT_VECTORS, fixing the included testcase with SSE1.
llvm-svn: 112171
This commit is contained in:
parent
4cec44975e
commit
eb2cc0ce0e
|
@ -581,6 +581,7 @@ private:
|
|||
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
|
||||
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Vector Widening Support: LegalizeVectorTypes.cpp
|
||||
|
|
|
@ -983,6 +983,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
|||
case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
|
||||
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
|
||||
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
|
||||
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
|
||||
case ISD::STORE:
|
||||
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
|
||||
break;
|
||||
|
@ -1091,8 +1092,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
|
|||
return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
|
||||
return SDValue(DAG.UpdateNodeOperands(N, Hi,
|
||||
DAG.getConstant(IdxVal - LoElts,
|
||||
Idx.getValueType())),
|
||||
0);
|
||||
Idx.getValueType())), 0);
|
||||
}
|
||||
|
||||
// Store the vector to the stack.
|
||||
|
@ -1113,7 +1113,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
|
|||
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
|
||||
assert(N->isUnindexed() && "Indexed store of vector?");
|
||||
assert(OpNo == 1 && "Can only split the stored value");
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
bool isTruncating = N->isTruncatingStore();
|
||||
SDValue Ch = N->getChain();
|
||||
|
@ -1132,25 +1132,49 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
|
|||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
|
||||
if (isTruncating)
|
||||
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
|
||||
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
|
||||
LoMemVT, isVol, isNT, Alignment);
|
||||
else
|
||||
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
|
||||
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
|
||||
isVol, isNT, Alignment);
|
||||
|
||||
// Increment the pointer to the other half.
|
||||
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
DAG.getIntPtrConstant(IncrementSize));
|
||||
SVOffset += IncrementSize;
|
||||
|
||||
if (isTruncating)
|
||||
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
|
||||
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
|
||||
HiMemVT, isVol, isNT, Alignment);
|
||||
else
|
||||
Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
|
||||
Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
|
||||
isVol, isNT, Alignment);
|
||||
|
||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
|
||||
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// The input operands all must have the same type, and we know the result the
|
||||
// result type is valid. Convert this to a buildvector which extracts all the
|
||||
// input elements.
|
||||
// TODO: If the input elements are power-two vectors, we could convert this to
|
||||
// a new CONCAT_VECTORS node with elements that are half-wide.
|
||||
SmallVector<SDValue, 32> Elts;
|
||||
EVT EltVT = N->getValueType(0).getVectorElementType();
|
||||
for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
|
||||
SDValue Op = N->getOperand(op);
|
||||
for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
|
||||
i != e; ++i) {
|
||||
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
|
||||
Op, DAG.getIntPtrConstant(i)));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
|
||||
&Elts[0], Elts.size());
|
||||
}
|
||||
|
||||
|
||||
|
@ -2223,8 +2247,15 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
|
|||
|
||||
// Check if we can load the element with one instruction
|
||||
if (LdWidth <= NewVTWidth) {
|
||||
if (NewVT.isVector()) {
|
||||
if (NewVT != WidenVT) {
|
||||
if (!NewVT.isVector()) {
|
||||
unsigned NumElts = WidenWidth / NewVTWidth;
|
||||
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
|
||||
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
|
||||
}
|
||||
if (NewVT == WidenVT)
|
||||
return LdOp;
|
||||
|
||||
assert(WidenWidth % NewVTWidth == 0);
|
||||
unsigned NumConcat = WidenWidth / NewVTWidth;
|
||||
SmallVector<SDValue, 16> ConcatOps(NumConcat);
|
||||
|
@ -2234,14 +2265,6 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
|
|||
ConcatOps[i] = UndefVal;
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
|
||||
NumConcat);
|
||||
} else
|
||||
return LdOp;
|
||||
} else {
|
||||
unsigned NumElts = WidenWidth / NewVTWidth;
|
||||
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
|
||||
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
|
||||
}
|
||||
}
|
||||
|
||||
// Load vector by using multiple loads from largest vector to scalar
|
||||
|
@ -2274,7 +2297,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
|
|||
|
||||
// Build the vector from the loads operations
|
||||
unsigned End = LdOps.size();
|
||||
if (LdOps[0].getValueType().isVector()) {
|
||||
if (!LdOps[0].getValueType().isVector())
|
||||
// All the loads are scalar loads.
|
||||
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
|
||||
|
||||
// If the load contains vectors, build the vector using concat vector.
|
||||
// All of the vectors used to loads are power of 2 and the scalars load
|
||||
// can be combined to make a power of 2 vector.
|
||||
|
@ -2304,22 +2330,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
|
|||
ConcatOps[--Idx] = LdOps[i];
|
||||
}
|
||||
|
||||
if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
|
||||
if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
|
||||
&ConcatOps[Idx], End - Idx);
|
||||
|
||||
// We need to fill the rest with undefs to build the vector
|
||||
unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
|
||||
SmallVector<SDValue, 16> WidenOps(NumOps);
|
||||
SDValue UndefVal = DAG.getUNDEF(LdTy);
|
||||
{
|
||||
unsigned i = 0;
|
||||
for (; i != End-Idx; ++i)
|
||||
WidenOps[i] = ConcatOps[Idx+i];
|
||||
for (; i != NumOps; ++i)
|
||||
WidenOps[i] = UndefVal;
|
||||
}
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
|
||||
} else
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
|
||||
&ConcatOps[Idx], End - Idx);
|
||||
} else // All the loads are scalar loads.
|
||||
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
|
|
@ -1320,9 +1320,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
|
|||
// llvm-gcc has never done it right and no one has noticed, so this
|
||||
// should be OK for now.
|
||||
if (ValVT == MVT::f64 &&
|
||||
(Subtarget->is64Bit() && !Subtarget->hasSSE2())) {
|
||||
(Subtarget->is64Bit() && !Subtarget->hasSSE2()))
|
||||
report_fatal_error("SSE2 register return with SSE2 disabled");
|
||||
}
|
||||
|
||||
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
|
||||
// the RET instruction and handled by the FP Stackifier.
|
||||
|
|
|
@ -6,3 +6,9 @@ define <8 x i16> @test1(<8 x i32> %a) nounwind {
|
|||
; CHECK: test1
|
||||
ret <8 x i16> zeroinitializer
|
||||
}
|
||||
|
||||
define <8 x i16> @test2(<8 x i32> %a) nounwind {
|
||||
; CHECK: test2
|
||||
%c = trunc <8 x i32> %a to <8 x i16> ; <<8 x i16>> [#uses=1]
|
||||
ret <8 x i16> %c
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue