Change handling of illegal vector types to widen when possible instead of

expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats.  This
affects two places in the code: handling cross block values and handling
function return and arguments.  Since vectors are already widened by 
legalizetypes, this gives us much better code and unblocks x86-64 abi
and SPU abi work.

For example, this (which is a silly example of a cross-block value):
define <4 x float> @test2(<4 x float> %A) nounwind {
 %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 %C = fadd <2 x float> %B, %B
  br label %BB
BB:
 %D = fadd <2 x float> %C, %C
 %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ret <4 x float> %E
}

Now compiles into:

_test2:                                 ## @test2
## BB#0:
 addps %xmm0, %xmm0
 addps %xmm0, %xmm0
 ret

previously it compiled into:

_test2:                                 ## @test2
## BB#0:
 addps %xmm0, %xmm0
 pshufd $1, %xmm0, %xmm1
                                        ## kill: XMM0<def> XMM0<kill> XMM0<def>
 insertps $0, %xmm0, %xmm0
 insertps $16, %xmm1, %xmm0
 addps %xmm0, %xmm0
 ret

This implements rdar://8230384

llvm-svn: 112101
This commit is contained in:
Chris Lattner 2010-08-25 22:49:25 +00:00
parent 9a68bccd0c
commit 75ff053497
5 changed files with 193 additions and 74 deletions

View File

@ -214,24 +214,59 @@ public:
/// ValueTypeActions - For each value type, keep a LegalizeAction enum
/// that indicates how instruction selection should deal with the type.
uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
LegalizeAction getExtendedTypeAction(EVT VT) const {
// Handle non-vector integers.
if (!VT.isVector()) {
assert(VT.isInteger() && "Unsupported extended type!");
unsigned BitSize = VT.getSizeInBits();
// First promote to a power-of-two size, then expand if necessary.
if (BitSize < 8 || !isPowerOf2_32(BitSize))
return Promote;
return Expand;
}
// If this is a type smaller than a legal vector type, promote to that
// type, e.g. <2 x float> -> <4 x float>.
if (VT.getVectorElementType().isSimple() &&
VT.getVectorNumElements() != 1) {
MVT EltType = VT.getVectorElementType().getSimpleVT();
unsigned NumElts = VT.getVectorNumElements();
while (1) {
// Round up to the nearest power of 2.
NumElts = (unsigned)NextPowerOf2(NumElts);
MVT LargerVector = MVT::getVectorVT(EltType, NumElts);
if (LargerVector == MVT()) break;
// If this the larger type is legal, promote to it.
if (getTypeAction(LargerVector) == Legal) return Promote;
}
}
return VT.isPow2VectorType() ? Expand : Promote;
}
public:
ValueTypeActionImpl() {
std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
}
/// FIXME: This Context argument is now dead, zap it.
LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
if (VT.isExtended()) {
if (VT.isVector()) {
return VT.isPow2VectorType() ? Expand : Promote;
}
if (VT.isInteger())
// First promote to a power-of-two size, then expand if necessary.
return VT == VT.getRoundIntegerType(Context) ? Expand : Promote;
assert(0 && "Unsupported extended type!");
return Legal;
}
unsigned I = VT.getSimpleVT().SimpleTy;
return (LegalizeAction)ValueTypeActions[I];
return getTypeAction(VT);
}
LegalizeAction getTypeAction(EVT VT) const {
if (!VT.isExtended())
return getTypeAction(VT.getSimpleVT());
return getExtendedTypeAction(VT);
}
LegalizeAction getTypeAction(MVT VT) const {
return (LegalizeAction)ValueTypeActions[VT.SimpleTy];
}
void setTypeAction(EVT VT, LegalizeAction Action) {
unsigned I = VT.getSimpleVT().SimpleTy;
ValueTypeActions[I] = Action;

View File

@ -252,8 +252,21 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
if (PartVT == ValueVT)
return Val;
if (PartVT.isVector()) // Vector/Vector bitcast.
if (PartVT.isVector()) {
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
"Cannot narrow, it would be a lossy transformation");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
DAG.getIntPtrConstant(0));
}
// Vector/Vector bitcast.
return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
}
assert(ValueVT.getVectorElementType() == PartVT &&
ValueVT.getVectorNumElements() == 1 &&
@ -392,16 +405,39 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (NumParts == 1) {
if (PartVT != ValueVT) {
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
} else {
assert(ValueVT.getVectorElementType() == PartVT &&
ValueVT.getVectorNumElements() == 1 &&
"Only trivial vector-to-scalar conversions should get here!");
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
PartVT, Val, DAG.getIntPtrConstant(0));
}
if (PartVT == ValueVT) {
// Nothing to do.
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
} else if (PartVT.isVector() &&
PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
EVT ElementVT = PartVT.getVectorElementType();
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
// undef elements.
SmallVector<SDValue, 16> Ops;
for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
ElementVT, Val, DAG.getIntPtrConstant(i)));
for (unsigned i = ValueVT.getVectorNumElements(),
e = PartVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ElementVT));
Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
// FIXME: Use CONCAT for 2x -> 4x.
//SDValue UndefElts = DAG.getUNDEF(VectorTy);
//Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
} else {
// Vector -> scalar conversion.
assert(ValueVT.getVectorElementType() == PartVT &&
ValueVT.getVectorNumElements() == 1 &&
"Only trivial vector-to-scalar conversions should get here!");
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
PartVT, Val, DAG.getIntPtrConstant(0));
}
Parts[0] = Val;
@ -428,8 +464,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
else
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
IntermediateVT, Val,
DAG.getIntPtrConstant(i));
IntermediateVT, Val, DAG.getIntPtrConstant(i));
}
// Split the intermediate operands into legal parts.

View File

@ -697,6 +697,7 @@ TargetLowering::findRepresentativeClass(EVT VT) const {
return std::make_pair(BestRC, 1);
}
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLowering::computeRegisterProperties() {
@ -782,6 +783,28 @@ void TargetLowering::computeRegisterProperties() {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
// Determine if there is a legal wider type. If so, we should promote to
// that wider vector type.
EVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
if (NElts != 1) {
bool IsLegalWiderType = false;
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
EVT SVT = (MVT::SimpleValueType)nVT;
if (SVT.getVectorElementType() == EltVT &&
SVT.getVectorNumElements() > NElts &&
isTypeSynthesizable(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
ValueTypeActions.setTypeAction(VT, Promote);
IsLegalWiderType = true;
break;
}
}
if (IsLegalWiderType) continue;
}
MVT IntermediateVT;
EVT RegisterVT;
unsigned NumIntermediates;
@ -790,30 +813,14 @@ void TargetLowering::computeRegisterProperties() {
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
// Determine if there is a legal wider type.
bool IsLegalWiderType = false;
EVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
EVT SVT = (MVT::SimpleValueType)nVT;
if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
SVT.getVectorNumElements() > NElts && NElts != 1) {
TransformToType[i] = SVT;
ValueTypeActions.setTypeAction(VT, Promote);
IsLegalWiderType = true;
break;
}
}
if (!IsLegalWiderType) {
EVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
TransformToType[i] = MVT::Other;
ValueTypeActions.setTypeAction(VT, Expand);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, Promote);
}
EVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
TransformToType[i] = MVT::Other;
ValueTypeActions.setTypeAction(VT, Expand);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, Promote);
}
}
@ -857,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT &IntermediateVT,
unsigned &NumIntermediates,
EVT &RegisterVT) const {
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
// If there is a wider vector type with the same element type as this one,
// we should widen to that legal vector type. This handles things like
// <2 x float> -> <4 x float>.
if (NumElts != 1 && getTypeAction(Context, VT) == Promote) {
RegisterVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterVT)) {
IntermediateVT = RegisterVT;
NumIntermediates = 1;
return 1;
}
}
// Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
@ -887,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
if (DestVT.bitsLT(NewVT)) {
// Value is expanded, e.g. i64 -> i16.
if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
} else {
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
return 1;
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
/// Get the EVTs and ArgFlags collections that represent the legalized return

View File

@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
store float %c, float* %P2
ret void
; X64: test1:
; X64-NEXT: addss %xmm1, %xmm0
; X64-NEXT: movss %xmm0, (%rdi)
; X64-NEXT: pshufd $1, %xmm0, %xmm1
; X64-NEXT: addss %xmm0, %xmm1
; X64-NEXT: movss %xmm1, (%rdi)
; X64-NEXT: ret
; X32: test1:
; X32-NEXT: movss 4(%esp), %xmm0
; X32-NEXT: addss 8(%esp), %xmm0
; X32-NEXT: movl 12(%esp), %eax
; X32-NEXT: movss %xmm0, (%eax)
; X32-NEXT: pshufd $1, %xmm0, %xmm1
; X32-NEXT: addss %xmm0, %xmm1
; X32-NEXT: movl 4(%esp), %eax
; X32-NEXT: movss %xmm1, (%eax)
; X32-NEXT: ret
}
@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
ret <2 x float> %Z
; X64: test2:
; X64-NEXT: insertps $0
; X64-NEXT: insertps $16
; X64-NEXT: insertps $0
; X64-NEXT: insertps $16
; X64-NEXT: addps
; X64-NEXT: movaps
; X64-NEXT: pshufd
; X64-NEXT: addps %xmm1, %xmm0
; X64-NEXT: ret
}
define <2 x float> @test3(<4 x float> %A) nounwind {
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%C = fadd <2 x float> %B, %B
ret <2 x float> %C
; CHECK: test3:
; CHECK-NEXT: addps %xmm0, %xmm0
; CHECK-NEXT: ret
}
define <2 x float> @test4(<2 x float> %A) nounwind {
%C = fadd <2 x float> %A, %A
ret <2 x float> %C
; CHECK: test4:
; CHECK-NEXT: addps %xmm0, %xmm0
; CHECK-NEXT: ret
}
define <4 x float> @test5(<4 x float> %A) nounwind {
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%C = fadd <2 x float> %B, %B
br label %BB
BB:
%D = fadd <2 x float> %C, %C
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x float> %E
; CHECK: _test5:
; CHECK-NEXT: addps %xmm0, %xmm0
; CHECK-NEXT: addps %xmm0, %xmm0
; CHECK-NEXT: ret
}

View File

@ -3,7 +3,8 @@
; widening shuffle v3float and then a add
define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
entry:
; CHECK: insertps
; CHECK: shuf:
; CHECK: extractps
; CHECK: extractps
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
%val = fadd <3 x float> %x, %src2
@ -15,7 +16,8 @@ entry:
; widening shuffle v3float with a different mask and then a add
define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
entry:
; CHECK: insertps
; CHECK: shuf2:
; CHECK: extractps
; CHECK: extractps
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
%val = fadd <3 x float> %x, %src2
@ -26,7 +28,7 @@ entry:
; Example of when widening a v3float operation causes the DAG to replace a node
; with the operation that we are currently widening, i.e. when replacing
; opA with opB, the DAG will produce new operations with opA.
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) {
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
entry:
; CHECK: pshufd
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>