From 75ff0534973ae5768ef955b56434a007dfc58abf Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 25 Aug 2010 22:49:25 +0000 Subject: [PATCH] Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0 XMM0 XMM0 insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 --- llvm/include/llvm/Target/TargetLowering.h | 59 ++++++++++--- .../SelectionDAG/SelectionDAGBuilder.cpp | 61 +++++++++++--- .../CodeGen/SelectionDAG/TargetLowering.cpp | 82 +++++++++++-------- llvm/test/CodeGen/X86/v2f32.ll | 57 ++++++++++--- llvm/test/CodeGen/X86/widen_shuffle-1.ll | 8 +- 5 files changed, 193 insertions(+), 74 deletions(-) diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 197b8d1eb47e..0dc9d4e0a8b7 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -214,24 +214,59 @@ public: /// ValueTypeActions - For each value type, keep a LegalizeAction enum /// that indicates how instruction selection should deal with the type. uint8_t ValueTypeActions[MVT::LAST_VALUETYPE]; + + LegalizeAction getExtendedTypeAction(EVT VT) const { + // Handle non-vector integers. + if (!VT.isVector()) { + assert(VT.isInteger() && "Unsupported extended type!"); + unsigned BitSize = VT.getSizeInBits(); + // First promote to a power-of-two size, then expand if necessary. + if (BitSize < 8 || !isPowerOf2_32(BitSize)) + return Promote; + return Expand; + } + + // If this is a type smaller than a legal vector type, promote to that + // type, e.g. <2 x float> -> <4 x float>. + if (VT.getVectorElementType().isSimple() && + VT.getVectorNumElements() != 1) { + MVT EltType = VT.getVectorElementType().getSimpleVT(); + unsigned NumElts = VT.getVectorNumElements(); + while (1) { + // Round up to the nearest power of 2. + NumElts = (unsigned)NextPowerOf2(NumElts); + + MVT LargerVector = MVT::getVectorVT(EltType, NumElts); + if (LargerVector == MVT()) break; + + // If this the larger type is legal, promote to it. + if (getTypeAction(LargerVector) == Legal) return Promote; + } + } + + return VT.isPow2VectorType() ? Expand : Promote; + } public: ValueTypeActionImpl() { std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0); } + + /// FIXME: This Context argument is now dead, zap it. LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const { - if (VT.isExtended()) { - if (VT.isVector()) { - return VT.isPow2VectorType() ? Expand : Promote; - } - if (VT.isInteger()) - // First promote to a power-of-two size, then expand if necessary. - return VT == VT.getRoundIntegerType(Context) ? Expand : Promote; - assert(0 && "Unsupported extended type!"); - return Legal; - } - unsigned I = VT.getSimpleVT().SimpleTy; - return (LegalizeAction)ValueTypeActions[I]; + return getTypeAction(VT); } + + LegalizeAction getTypeAction(EVT VT) const { + if (!VT.isExtended()) + return getTypeAction(VT.getSimpleVT()); + return getExtendedTypeAction(VT); + } + + LegalizeAction getTypeAction(MVT VT) const { + return (LegalizeAction)ValueTypeActions[VT.SimpleTy]; + } + + void setTypeAction(EVT VT, LegalizeAction Action) { unsigned I = VT.getSimpleVT().SimpleTy; ValueTypeActions[I] = Action; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a8a8ecb4c131..35e752021372 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -252,8 +252,21 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, if (PartVT == ValueVT) return Val; - if (PartVT.isVector()) // Vector/Vector bitcast. + if (PartVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getIntPtrConstant(0)); + } + + // Vector/Vector bitcast. return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + } assert(ValueVT.getVectorElementType() == PartVT && ValueVT.getVectorNumElements() == 1 && @@ -392,16 +405,39 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (NumParts == 1) { - if (PartVT != ValueVT) { - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { - Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); - } else { - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getIntPtrConstant(0)); - } + if (PartVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (PartVT.isVector() && + PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector Ops; + for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ElementVT, Val, DAG.getIntPtrConstant(i))); + + for (unsigned i = ValueVT.getVectorNumElements(), + e = PartVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ElementVT)); + + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + + // FIXME: Use CONCAT for 2x -> 4x. + + //SDValue UndefElts = DAG.getUNDEF(VectorTy); + //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else { + // Vector -> scalar conversion. + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + PartVT, Val, DAG.getIntPtrConstant(0)); } Parts[0] = Val; @@ -428,8 +464,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntermediateVT, Val, - DAG.getIntPtrConstant(i)); + IntermediateVT, Val, DAG.getIntPtrConstant(i)); } // Split the intermediate operands into legal parts. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9772d55449e9..077fd1dadd11 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -697,6 +697,7 @@ TargetLowering::findRepresentativeClass(EVT VT) const { return std::make_pair(BestRC, 1); } + /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLowering::computeRegisterProperties() { @@ -782,6 +783,28 @@ void TargetLowering::computeRegisterProperties() { MVT VT = (MVT::SimpleValueType)i; if (isTypeLegal(VT)) continue; + // Determine if there is a legal wider type. If so, we should promote to + // that wider vector type. + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + if (NElts != 1) { + bool IsLegalWiderType = false; + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + if (SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && + isTypeSynthesizable(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) continue; + } + MVT IntermediateVT; EVT RegisterVT; unsigned NumIntermediates; @@ -790,30 +813,14 @@ void TargetLowering::computeRegisterProperties() { RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; - // Determine if there is a legal wider type. - bool IsLegalWiderType = false; - EVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && NElts != 1) { - TransformToType[i] = SVT; - ValueTypeActions.setTypeAction(VT, Promote); - IsLegalWiderType = true; - break; - } - } - if (!IsLegalWiderType) { - EVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); - } + EVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); } } @@ -857,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, EVT &RegisterVT) const { - // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // we should widen to that legal vector type. This handles things like + // <2 x float> -> <4 x float>. + if (NumElts != 1 && getTypeAction(Context, VT) == Promote) { + RegisterVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterVT)) { + IntermediateVT = RegisterVT; + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. EVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; @@ -887,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; - if (DestVT.bitsLT(NewVT)) { - // Value is expanded, e.g. i64 -> i16. + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - } else { - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; - } - return 1; + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; } /// Get the EVTs and ArgFlags collections that represent the legalized return diff --git a/llvm/test/CodeGen/X86/v2f32.ll b/llvm/test/CodeGen/X86/v2f32.ll index 9c4b773a6190..76c3fdfc060c 100644 --- a/llvm/test/CodeGen/X86/v2f32.ll +++ b/llvm/test/CodeGen/X86/v2f32.ll @@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind { store float %c, float* %P2 ret void ; X64: test1: -; X64-NEXT: addss %xmm1, %xmm0 -; X64-NEXT: movss %xmm0, (%rdi) +; X64-NEXT: pshufd $1, %xmm0, %xmm1 +; X64-NEXT: addss %xmm0, %xmm1 +; X64-NEXT: movss %xmm1, (%rdi) ; X64-NEXT: ret ; X32: test1: -; X32-NEXT: movss 4(%esp), %xmm0 -; X32-NEXT: addss 8(%esp), %xmm0 -; X32-NEXT: movl 12(%esp), %eax -; X32-NEXT: movss %xmm0, (%eax) +; X32-NEXT: pshufd $1, %xmm0, %xmm1 +; X32-NEXT: addss %xmm0, %xmm1 +; X32-NEXT: movl 4(%esp), %eax +; X32-NEXT: movss %xmm1, (%eax) ; X32-NEXT: ret } @@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw ret <2 x float> %Z ; X64: test2: -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: addps -; X64-NEXT: movaps -; X64-NEXT: pshufd +; X64-NEXT: addps %xmm1, %xmm0 ; X64-NEXT: ret } + + +define <2 x float> @test3(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> + %C = fadd <2 x float> %B, %B + ret <2 x float> %C +; CHECK: test3: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <2 x float> @test4(<2 x float> %A) nounwind { + %C = fadd <2 x float> %A, %A + ret <2 x float> %C +; CHECK: test4: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <4 x float> @test5(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> + %C = fadd <2 x float> %B, %B + br label %BB + +BB: + %D = fadd <2 x float> %C, %C + %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> + ret <4 x float> %E + +; CHECK: _test5: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + + diff --git a/llvm/test/CodeGen/X86/widen_shuffle-1.ll b/llvm/test/CodeGen/X86/widen_shuffle-1.ll index 25dde57c767e..463f522a11df 100644 --- a/llvm/test/CodeGen/X86/widen_shuffle-1.ll +++ b/llvm/test/CodeGen/X86/widen_shuffle-1.ll @@ -3,7 +3,8 @@ ; widening shuffle v3float and then a add define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2> %val = fadd <3 x float> %x, %src2 @@ -15,7 +16,8 @@ entry: ; widening shuffle v3float with a different mask and then a add define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf2: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> %val = fadd <3 x float> %x, %src2 @@ -26,7 +28,7 @@ entry: ; Example of when widening a v3float operation causes the DAG to replace a node ; with the operation that we are currently widening, i.e. when replacing ; opA with opB, the DAG will produce new operations with opA. -define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) { +define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK: pshufd %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32>