implement SplitVecOp_CONCAT_VECTORS, fixing the included testcase with SSE1.

llvm-svn: 112171
This commit is contained in:
Chris Lattner 2010-08-26 05:51:22 +00:00
parent 4cec44975e
commit eb2cc0ce0e
4 changed files with 100 additions and 68 deletions

View File

@ -581,6 +581,7 @@ private:
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp

View File

@ -983,6 +983,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@ -1091,8 +1092,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
return SDValue(DAG.UpdateNodeOperands(N, Hi,
DAG.getConstant(IdxVal - LoElts,
Idx.getValueType())),
0);
Idx.getValueType())), 0);
}
// Store the vector to the stack.
@ -1113,7 +1113,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
assert(N->isUnindexed() && "Indexed store of vector?");
assert(OpNo == 1 && "Can only split the stored value");
DebugLoc dl = N->getDebugLoc();
DebugLoc DL = N->getDebugLoc();
bool isTruncating = N->isTruncatingStore();
SDValue Ch = N->getChain();
@ -1132,25 +1132,49 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
LoMemVT, isVol, isNT, Alignment);
else
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
isVol, isNT, Alignment);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
SVOffset += IncrementSize;
if (isTruncating)
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
HiMemVT, isVol, isNT, Alignment);
else
Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
isVol, isNT, Alignment);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
// The input operands all must have the same type, and we know the result the
// result type is valid. Convert this to a buildvector which extracts all the
// input elements.
// TODO: If the input elements are power-two vectors, we could convert this to
// a new CONCAT_VECTORS node with elements that are half-wide.
SmallVector<SDValue, 32> Elts;
EVT EltVT = N->getValueType(0).getVectorElementType();
for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
SDValue Op = N->getOperand(op);
for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
i != e; ++i) {
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
Op, DAG.getIntPtrConstant(i)));
}
}
return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
&Elts[0], Elts.size());
}
@ -2223,25 +2247,24 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Check if we can load the element with one instruction
if (LdWidth <= NewVTWidth) {
if (NewVT.isVector()) {
if (NewVT != WidenVT) {
assert(WidenWidth % NewVTWidth == 0);
unsigned NumConcat = WidenWidth / NewVTWidth;
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(NewVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
NumConcat);
} else
return LdOp;
} else {
if (!NewVT.isVector()) {
unsigned NumElts = WidenWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
}
if (NewVT == WidenVT)
return LdOp;
assert(WidenWidth % NewVTWidth == 0);
unsigned NumConcat = WidenWidth / NewVTWidth;
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(NewVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
NumConcat);
}
// Load vector by using multiple loads from largest vector to scalar
@ -2274,52 +2297,55 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Build the vector from the loads operations
unsigned End = LdOps.size();
if (LdOps[0].getValueType().isVector()) {
// If the load contains vectors, build the vector using concat vector.
// All of the vectors used to loads are power of 2 and the scalars load
// can be combined to make a power of 2 vector.
SmallVector<SDValue, 16> ConcatOps(End);
int i = End - 1;
int Idx = End;
EVT LdTy = LdOps[i].getValueType();
// First combine the scalar loads to a vector
if (!LdTy.isVector()) {
for (--i; i >= 0; --i) {
LdTy = LdOps[i].getValueType();
if (LdTy.isVector())
break;
}
ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
if (!LdOps[0].getValueType().isVector())
// All the loads are scalar loads.
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
// If the load contains vectors, build the vector using concat vector.
// All of the vectors used to loads are power of 2 and the scalars load
// can be combined to make a power of 2 vector.
SmallVector<SDValue, 16> ConcatOps(End);
int i = End - 1;
int Idx = End;
EVT LdTy = LdOps[i].getValueType();
// First combine the scalar loads to a vector
if (!LdTy.isVector()) {
for (--i; i >= 0; --i) {
LdTy = LdOps[i].getValueType();
if (LdTy.isVector())
break;
}
ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
}
ConcatOps[--Idx] = LdOps[i];
for (--i; i >= 0; --i) {
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
// Create a larger vector
ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
&ConcatOps[Idx], End - Idx);
Idx = End - 1;
LdTy = NewLdTy;
}
ConcatOps[--Idx] = LdOps[i];
for (--i; i >= 0; --i) {
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
// Create a larger vector
ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
&ConcatOps[Idx], End - Idx);
Idx = End - 1;
LdTy = NewLdTy;
}
ConcatOps[--Idx] = LdOps[i];
}
}
if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
// We need to fill the rest with undefs to build the vector
unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
unsigned i = 0;
for (; i != End-Idx; ++i)
WidenOps[i] = ConcatOps[Idx+i];
for (; i != NumOps; ++i)
WidenOps[i] = UndefVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
} else
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
&ConcatOps[Idx], End - Idx);
} else // All the loads are scalar loads.
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
&ConcatOps[Idx], End - Idx);
// We need to fill the rest with undefs to build the vector
unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
{
unsigned i = 0;
for (; i != End-Idx; ++i)
WidenOps[i] = ConcatOps[Idx+i];
for (; i != NumOps; ++i)
WidenOps[i] = UndefVal;
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
}
SDValue

View File

@ -1320,9 +1320,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
(Subtarget->is64Bit() && !Subtarget->hasSSE2())) {
(Subtarget->is64Bit() && !Subtarget->hasSSE2()))
report_fatal_error("SSE2 register return with SSE2 disabled");
}
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.

View File

@ -6,3 +6,9 @@ define <8 x i16> @test1(<8 x i32> %a) nounwind {
; CHECK: test1
ret <8 x i16> zeroinitializer
}
define <8 x i16> @test2(<8 x i32> %a) nounwind {
; CHECK: test2
%c = trunc <8 x i32> %a to <8 x i16> ; <<8 x i16>> [#uses=1]
ret <8 x i16> %c
}