forked from OSchip/llvm-project
Change handling of illegal vector types to widen when possible instead of
expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101
This commit is contained in:
parent
9a68bccd0c
commit
75ff053497
|
@ -214,24 +214,59 @@ public:
|
|||
/// ValueTypeActions - For each value type, keep a LegalizeAction enum
|
||||
/// that indicates how instruction selection should deal with the type.
|
||||
uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
|
||||
|
||||
LegalizeAction getExtendedTypeAction(EVT VT) const {
|
||||
// Handle non-vector integers.
|
||||
if (!VT.isVector()) {
|
||||
assert(VT.isInteger() && "Unsupported extended type!");
|
||||
unsigned BitSize = VT.getSizeInBits();
|
||||
// First promote to a power-of-two size, then expand if necessary.
|
||||
if (BitSize < 8 || !isPowerOf2_32(BitSize))
|
||||
return Promote;
|
||||
return Expand;
|
||||
}
|
||||
|
||||
// If this is a type smaller than a legal vector type, promote to that
|
||||
// type, e.g. <2 x float> -> <4 x float>.
|
||||
if (VT.getVectorElementType().isSimple() &&
|
||||
VT.getVectorNumElements() != 1) {
|
||||
MVT EltType = VT.getVectorElementType().getSimpleVT();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
while (1) {
|
||||
// Round up to the nearest power of 2.
|
||||
NumElts = (unsigned)NextPowerOf2(NumElts);
|
||||
|
||||
MVT LargerVector = MVT::getVectorVT(EltType, NumElts);
|
||||
if (LargerVector == MVT()) break;
|
||||
|
||||
// If this the larger type is legal, promote to it.
|
||||
if (getTypeAction(LargerVector) == Legal) return Promote;
|
||||
}
|
||||
}
|
||||
|
||||
return VT.isPow2VectorType() ? Expand : Promote;
|
||||
}
|
||||
public:
|
||||
ValueTypeActionImpl() {
|
||||
std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
|
||||
}
|
||||
|
||||
/// FIXME: This Context argument is now dead, zap it.
|
||||
LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
|
||||
if (VT.isExtended()) {
|
||||
if (VT.isVector()) {
|
||||
return VT.isPow2VectorType() ? Expand : Promote;
|
||||
}
|
||||
if (VT.isInteger())
|
||||
// First promote to a power-of-two size, then expand if necessary.
|
||||
return VT == VT.getRoundIntegerType(Context) ? Expand : Promote;
|
||||
assert(0 && "Unsupported extended type!");
|
||||
return Legal;
|
||||
}
|
||||
unsigned I = VT.getSimpleVT().SimpleTy;
|
||||
return (LegalizeAction)ValueTypeActions[I];
|
||||
return getTypeAction(VT);
|
||||
}
|
||||
|
||||
LegalizeAction getTypeAction(EVT VT) const {
|
||||
if (!VT.isExtended())
|
||||
return getTypeAction(VT.getSimpleVT());
|
||||
return getExtendedTypeAction(VT);
|
||||
}
|
||||
|
||||
LegalizeAction getTypeAction(MVT VT) const {
|
||||
return (LegalizeAction)ValueTypeActions[VT.SimpleTy];
|
||||
}
|
||||
|
||||
|
||||
void setTypeAction(EVT VT, LegalizeAction Action) {
|
||||
unsigned I = VT.getSimpleVT().SimpleTy;
|
||||
ValueTypeActions[I] = Action;
|
||||
|
|
|
@ -252,8 +252,21 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
|
|||
if (PartVT == ValueVT)
|
||||
return Val;
|
||||
|
||||
if (PartVT.isVector()) // Vector/Vector bitcast.
|
||||
if (PartVT.isVector()) {
|
||||
// If the element type of the source/dest vectors are the same, but the
|
||||
// parts vector has more elements than the value vector, then we have a
|
||||
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
|
||||
// elements we want.
|
||||
if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
|
||||
assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
|
||||
"Cannot narrow, it would be a lossy transformation");
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
|
||||
DAG.getIntPtrConstant(0));
|
||||
}
|
||||
|
||||
// Vector/Vector bitcast.
|
||||
return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
|
||||
}
|
||||
|
||||
assert(ValueVT.getVectorElementType() == PartVT &&
|
||||
ValueVT.getVectorNumElements() == 1 &&
|
||||
|
@ -392,16 +405,39 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
|
|||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
|
||||
if (NumParts == 1) {
|
||||
if (PartVT != ValueVT) {
|
||||
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
|
||||
Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
|
||||
} else {
|
||||
assert(ValueVT.getVectorElementType() == PartVT &&
|
||||
ValueVT.getVectorNumElements() == 1 &&
|
||||
"Only trivial vector-to-scalar conversions should get here!");
|
||||
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
||||
PartVT, Val, DAG.getIntPtrConstant(0));
|
||||
}
|
||||
if (PartVT == ValueVT) {
|
||||
// Nothing to do.
|
||||
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
|
||||
// Bitconvert vector->vector case.
|
||||
Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
|
||||
} else if (PartVT.isVector() &&
|
||||
PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
|
||||
PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
|
||||
EVT ElementVT = PartVT.getVectorElementType();
|
||||
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
|
||||
// undef elements.
|
||||
SmallVector<SDValue, 16> Ops;
|
||||
for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
||||
ElementVT, Val, DAG.getIntPtrConstant(i)));
|
||||
|
||||
for (unsigned i = ValueVT.getVectorNumElements(),
|
||||
e = PartVT.getVectorNumElements(); i != e; ++i)
|
||||
Ops.push_back(DAG.getUNDEF(ElementVT));
|
||||
|
||||
Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
|
||||
|
||||
// FIXME: Use CONCAT for 2x -> 4x.
|
||||
|
||||
//SDValue UndefElts = DAG.getUNDEF(VectorTy);
|
||||
//Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
|
||||
} else {
|
||||
// Vector -> scalar conversion.
|
||||
assert(ValueVT.getVectorElementType() == PartVT &&
|
||||
ValueVT.getVectorNumElements() == 1 &&
|
||||
"Only trivial vector-to-scalar conversions should get here!");
|
||||
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
||||
PartVT, Val, DAG.getIntPtrConstant(0));
|
||||
}
|
||||
|
||||
Parts[0] = Val;
|
||||
|
@ -428,8 +464,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
|
|||
DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
|
||||
else
|
||||
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
||||
IntermediateVT, Val,
|
||||
DAG.getIntPtrConstant(i));
|
||||
IntermediateVT, Val, DAG.getIntPtrConstant(i));
|
||||
}
|
||||
|
||||
// Split the intermediate operands into legal parts.
|
||||
|
|
|
@ -697,6 +697,7 @@ TargetLowering::findRepresentativeClass(EVT VT) const {
|
|||
return std::make_pair(BestRC, 1);
|
||||
}
|
||||
|
||||
|
||||
/// computeRegisterProperties - Once all of the register classes are added,
|
||||
/// this allows us to compute derived properties we expose.
|
||||
void TargetLowering::computeRegisterProperties() {
|
||||
|
@ -782,6 +783,28 @@ void TargetLowering::computeRegisterProperties() {
|
|||
MVT VT = (MVT::SimpleValueType)i;
|
||||
if (isTypeLegal(VT)) continue;
|
||||
|
||||
// Determine if there is a legal wider type. If so, we should promote to
|
||||
// that wider vector type.
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
unsigned NElts = VT.getVectorNumElements();
|
||||
if (NElts != 1) {
|
||||
bool IsLegalWiderType = false;
|
||||
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
|
||||
EVT SVT = (MVT::SimpleValueType)nVT;
|
||||
if (SVT.getVectorElementType() == EltVT &&
|
||||
SVT.getVectorNumElements() > NElts &&
|
||||
isTypeSynthesizable(SVT)) {
|
||||
TransformToType[i] = SVT;
|
||||
RegisterTypeForVT[i] = SVT;
|
||||
NumRegistersForVT[i] = 1;
|
||||
ValueTypeActions.setTypeAction(VT, Promote);
|
||||
IsLegalWiderType = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (IsLegalWiderType) continue;
|
||||
}
|
||||
|
||||
MVT IntermediateVT;
|
||||
EVT RegisterVT;
|
||||
unsigned NumIntermediates;
|
||||
|
@ -790,30 +813,14 @@ void TargetLowering::computeRegisterProperties() {
|
|||
RegisterVT, this);
|
||||
RegisterTypeForVT[i] = RegisterVT;
|
||||
|
||||
// Determine if there is a legal wider type.
|
||||
bool IsLegalWiderType = false;
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
unsigned NElts = VT.getVectorNumElements();
|
||||
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
|
||||
EVT SVT = (MVT::SimpleValueType)nVT;
|
||||
if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
|
||||
SVT.getVectorNumElements() > NElts && NElts != 1) {
|
||||
TransformToType[i] = SVT;
|
||||
ValueTypeActions.setTypeAction(VT, Promote);
|
||||
IsLegalWiderType = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!IsLegalWiderType) {
|
||||
EVT NVT = VT.getPow2VectorType();
|
||||
if (NVT == VT) {
|
||||
// Type is already a power of 2. The default action is to split.
|
||||
TransformToType[i] = MVT::Other;
|
||||
ValueTypeActions.setTypeAction(VT, Expand);
|
||||
} else {
|
||||
TransformToType[i] = NVT;
|
||||
ValueTypeActions.setTypeAction(VT, Promote);
|
||||
}
|
||||
EVT NVT = VT.getPow2VectorType();
|
||||
if (NVT == VT) {
|
||||
// Type is already a power of 2. The default action is to split.
|
||||
TransformToType[i] = MVT::Other;
|
||||
ValueTypeActions.setTypeAction(VT, Expand);
|
||||
} else {
|
||||
TransformToType[i] = NVT;
|
||||
ValueTypeActions.setTypeAction(VT, Promote);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -857,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
|
|||
EVT &IntermediateVT,
|
||||
unsigned &NumIntermediates,
|
||||
EVT &RegisterVT) const {
|
||||
// Figure out the right, legal destination reg to copy into.
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
// If there is a wider vector type with the same element type as this one,
|
||||
// we should widen to that legal vector type. This handles things like
|
||||
// <2 x float> -> <4 x float>.
|
||||
if (NumElts != 1 && getTypeAction(Context, VT) == Promote) {
|
||||
RegisterVT = getTypeToTransformTo(Context, VT);
|
||||
if (isTypeLegal(RegisterVT)) {
|
||||
IntermediateVT = RegisterVT;
|
||||
NumIntermediates = 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out the right, legal destination reg to copy into.
|
||||
EVT EltTy = VT.getVectorElementType();
|
||||
|
||||
unsigned NumVectorRegs = 1;
|
||||
|
@ -887,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
|
|||
|
||||
EVT DestVT = getRegisterType(Context, NewVT);
|
||||
RegisterVT = DestVT;
|
||||
if (DestVT.bitsLT(NewVT)) {
|
||||
// Value is expanded, e.g. i64 -> i16.
|
||||
if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
|
||||
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
|
||||
} else {
|
||||
// Otherwise, promotion or legal types use the same number of registers as
|
||||
// the vector decimated to the appropriate level.
|
||||
return NumVectorRegs;
|
||||
}
|
||||
|
||||
return 1;
|
||||
// Otherwise, promotion or legal types use the same number of registers as
|
||||
// the vector decimated to the appropriate level.
|
||||
return NumVectorRegs;
|
||||
}
|
||||
|
||||
/// Get the EVTs and ArgFlags collections that represent the legalized return
|
||||
|
|
|
@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
|
|||
store float %c, float* %P2
|
||||
ret void
|
||||
; X64: test1:
|
||||
; X64-NEXT: addss %xmm1, %xmm0
|
||||
; X64-NEXT: movss %xmm0, (%rdi)
|
||||
; X64-NEXT: pshufd $1, %xmm0, %xmm1
|
||||
; X64-NEXT: addss %xmm0, %xmm1
|
||||
; X64-NEXT: movss %xmm1, (%rdi)
|
||||
; X64-NEXT: ret
|
||||
|
||||
; X32: test1:
|
||||
; X32-NEXT: movss 4(%esp), %xmm0
|
||||
; X32-NEXT: addss 8(%esp), %xmm0
|
||||
; X32-NEXT: movl 12(%esp), %eax
|
||||
; X32-NEXT: movss %xmm0, (%eax)
|
||||
; X32-NEXT: pshufd $1, %xmm0, %xmm1
|
||||
; X32-NEXT: addss %xmm0, %xmm1
|
||||
; X32-NEXT: movl 4(%esp), %eax
|
||||
; X32-NEXT: movss %xmm1, (%eax)
|
||||
; X32-NEXT: ret
|
||||
}
|
||||
|
||||
|
@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
|
|||
ret <2 x float> %Z
|
||||
|
||||
; X64: test2:
|
||||
; X64-NEXT: insertps $0
|
||||
; X64-NEXT: insertps $16
|
||||
; X64-NEXT: insertps $0
|
||||
; X64-NEXT: insertps $16
|
||||
; X64-NEXT: addps
|
||||
; X64-NEXT: movaps
|
||||
; X64-NEXT: pshufd
|
||||
; X64-NEXT: addps %xmm1, %xmm0
|
||||
; X64-NEXT: ret
|
||||
}
|
||||
|
||||
|
||||
define <2 x float> @test3(<4 x float> %A) nounwind {
|
||||
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%C = fadd <2 x float> %B, %B
|
||||
ret <2 x float> %C
|
||||
; CHECK: test3:
|
||||
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define <2 x float> @test4(<2 x float> %A) nounwind {
|
||||
%C = fadd <2 x float> %A, %A
|
||||
ret <2 x float> %C
|
||||
; CHECK: test4:
|
||||
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define <4 x float> @test5(<4 x float> %A) nounwind {
|
||||
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%C = fadd <2 x float> %B, %B
|
||||
br label %BB
|
||||
|
||||
BB:
|
||||
%D = fadd <2 x float> %C, %C
|
||||
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
ret <4 x float> %E
|
||||
|
||||
; CHECK: _test5:
|
||||
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
; widening shuffle v3float and then a add
|
||||
define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
|
||||
entry:
|
||||
; CHECK: insertps
|
||||
; CHECK: shuf:
|
||||
; CHECK: extractps
|
||||
; CHECK: extractps
|
||||
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
|
||||
%val = fadd <3 x float> %x, %src2
|
||||
|
@ -15,7 +16,8 @@ entry:
|
|||
; widening shuffle v3float with a different mask and then a add
|
||||
define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
|
||||
entry:
|
||||
; CHECK: insertps
|
||||
; CHECK: shuf2:
|
||||
; CHECK: extractps
|
||||
; CHECK: extractps
|
||||
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
|
||||
%val = fadd <3 x float> %x, %src2
|
||||
|
@ -26,7 +28,7 @@ entry:
|
|||
; Example of when widening a v3float operation causes the DAG to replace a node
|
||||
; with the operation that we are currently widening, i.e. when replacing
|
||||
; opA with opB, the DAG will produce new operations with opA.
|
||||
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) {
|
||||
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
|
||||
entry:
|
||||
; CHECK: pshufd
|
||||
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
|
|
Loading…
Reference in New Issue