forked from OSchip/llvm-project
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector: define <4 x float> @foo(<4 x float> %x) { %ins1 = insertelement <4 x float> %x, float 1.0, i32 1 %ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2 ret <4 x float> %ins2 } InstCombine could reduce that to a shufflevector: define <4 x float> @goo(<4 x float> %x) { %shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3> ret <4 x float> %shuf } Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e. shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> -> insertelement <4 x float> %v, float 1.0, 1 Differential Revision: https://reviews.llvm.org/D24182 llvm-svn: 282237
This commit is contained in:
parent
0f8f0d369d
commit
fee9078dcd
llvm
lib/Transforms/InstCombine
test/Transforms/InstCombine
|
@ -1037,17 +1037,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
|||
}
|
||||
}
|
||||
|
||||
APInt UndefElts4(LHSVWidth, 0);
|
||||
APInt LHSUndefElts(LHSVWidth, 0);
|
||||
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
|
||||
UndefElts4, Depth + 1);
|
||||
LHSUndefElts, Depth + 1);
|
||||
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
|
||||
|
||||
APInt UndefElts3(LHSVWidth, 0);
|
||||
APInt RHSUndefElts(LHSVWidth, 0);
|
||||
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
|
||||
UndefElts3, Depth + 1);
|
||||
RHSUndefElts, Depth + 1);
|
||||
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
|
||||
|
||||
bool NewUndefElts = false;
|
||||
unsigned LHSIdx = -1u;
|
||||
unsigned RHSIdx = -1u;
|
||||
bool LHSUniform = true;
|
||||
bool RHSUniform = true;
|
||||
for (unsigned i = 0; i < VWidth; i++) {
|
||||
unsigned MaskVal = Shuffle->getMaskValue(i);
|
||||
if (MaskVal == -1u) {
|
||||
|
@ -1056,18 +1060,57 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
|||
NewUndefElts = true;
|
||||
UndefElts.setBit(i);
|
||||
} else if (MaskVal < LHSVWidth) {
|
||||
if (UndefElts4[MaskVal]) {
|
||||
if (LHSUndefElts[MaskVal]) {
|
||||
NewUndefElts = true;
|
||||
UndefElts.setBit(i);
|
||||
} else {
|
||||
LHSIdx = LHSIdx == -1u ? MaskVal : LHSVWidth;
|
||||
LHSUniform = LHSUniform && (MaskVal == i);
|
||||
}
|
||||
} else {
|
||||
if (UndefElts3[MaskVal - LHSVWidth]) {
|
||||
if (RHSUndefElts[MaskVal - LHSVWidth]) {
|
||||
NewUndefElts = true;
|
||||
UndefElts.setBit(i);
|
||||
} else {
|
||||
RHSIdx = RHSIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth;
|
||||
RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to transform shuffle with constant vector and single element from
|
||||
// this constant vector to single insertelement instruction.
|
||||
// shufflevector V, C, <v1, v2, .., ci, .., vm> ->
|
||||
// insertelement V, C[ci], ci-n
|
||||
if (LHSVWidth == Shuffle->getType()->getNumElements()) {
|
||||
Value *Op = nullptr;
|
||||
Constant *Value = nullptr;
|
||||
unsigned Idx = -1u;
|
||||
|
||||
// Find constant vector wigth the single element in shuffle (LHS or RHS).
|
||||
if (LHSIdx < LHSVWidth && RHSUniform) {
|
||||
if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
|
||||
Op = Shuffle->getOperand(1);
|
||||
Value = CV->getOperand(LHSIdx);
|
||||
Idx = LHSIdx;
|
||||
}
|
||||
}
|
||||
if (RHSIdx < LHSVWidth && LHSUniform) {
|
||||
if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
|
||||
Op = Shuffle->getOperand(0);
|
||||
Value = CV->getOperand(RHSIdx);
|
||||
Idx = RHSIdx;
|
||||
}
|
||||
}
|
||||
// Found constant vector with single element - convert to insertelement.
|
||||
if (Op && Value) {
|
||||
Instruction *New = InsertElementInst::Create(
|
||||
Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
|
||||
Shuffle->getName());
|
||||
InsertNewInstWith(New, *Shuffle);
|
||||
return New;
|
||||
}
|
||||
}
|
||||
if (NewUndefElts) {
|
||||
// Add additional discovered undefs.
|
||||
SmallVector<Constant*, 16> Elts;
|
||||
|
|
|
@ -585,58 +585,103 @@ static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
|
|||
return true;
|
||||
}
|
||||
|
||||
/// insertelt (shufflevector X, CVec, Mask), C, CIndex -->
|
||||
/// shufflevector X, CVec', Mask'
|
||||
/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
|
||||
/// --> shufflevector X, CVec', Mask'
|
||||
static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
|
||||
// Bail out if the shuffle has more than one use. In that case, we'd be
|
||||
auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
|
||||
// Bail out if the parent has more than one use. In that case, we'd be
|
||||
// replacing the insertelt with a shuffle, and that's not a clear win.
|
||||
auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
|
||||
if (!Shuf || !Shuf->hasOneUse())
|
||||
if (!Inst || !Inst->hasOneUse())
|
||||
return nullptr;
|
||||
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
|
||||
// The shuffle must have a constant vector operand. The insertelt must have
|
||||
// a constant scalar being inserted at a constant position in the vector.
|
||||
Constant *ShufConstVec, *InsEltScalar;
|
||||
uint64_t InsEltIndex;
|
||||
if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
|
||||
!match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
|
||||
!match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
|
||||
return nullptr;
|
||||
|
||||
// The shuffle must have a constant vector operand. The insertelt must have a
|
||||
// constant scalar being inserted at a constant position in the vector.
|
||||
Constant *ShufConstVec, *InsEltScalar;
|
||||
uint64_t InsEltIndex;
|
||||
if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
|
||||
!match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
|
||||
!match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
|
||||
return nullptr;
|
||||
// Adding an element to an arbitrary shuffle could be expensive, but a
|
||||
// shuffle that selects elements from vectors without crossing lanes is
|
||||
// assumed cheap.
|
||||
// If we're just adding a constant into that shuffle, it will still be
|
||||
// cheap.
|
||||
if (!isShuffleEquivalentToSelect(*Shuf))
|
||||
return nullptr;
|
||||
|
||||
// Adding an element to an arbitrary shuffle could be expensive, but a shuffle
|
||||
// that selects elements from vectors without crossing lanes is assumed cheap.
|
||||
// If we're just adding a constant into that shuffle, it will still be cheap.
|
||||
if (!isShuffleEquivalentToSelect(*Shuf))
|
||||
return nullptr;
|
||||
|
||||
// From the above 'select' check, we know that the mask has the same number of
|
||||
// elements as the vector input operands. We also know that each constant
|
||||
// input element is used in its lane and can not be used more than once by the
|
||||
// shuffle. Therefore, replace the constant in the shuffle's constant vector
|
||||
// with the insertelt constant. Replace the constant in the shuffle's mask
|
||||
// vector with the insertelt index plus the length of the vector (because the
|
||||
// constant vector operand of a shuffle is always the 2nd operand).
|
||||
Constant *Mask = Shuf->getMask();
|
||||
unsigned NumElts = Mask->getType()->getVectorNumElements();
|
||||
SmallVector<Constant*, 16> NewShufElts(NumElts);
|
||||
SmallVector<Constant*, 16> NewMaskElts(NumElts);
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (i == InsEltIndex) {
|
||||
NewShufElts[i] = InsEltScalar;
|
||||
Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
|
||||
NewMaskElts[i] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
|
||||
} else {
|
||||
// Copy over the existing values.
|
||||
NewShufElts[i] = ShufConstVec->getAggregateElement(i);
|
||||
NewMaskElts[i] = Mask->getAggregateElement(i);
|
||||
// From the above 'select' check, we know that the mask has the same number
|
||||
// of elements as the vector input operands. We also know that each constant
|
||||
// input element is used in its lane and can not be used more than once by
|
||||
// the shuffle. Therefore, replace the constant in the shuffle's constant
|
||||
// vector with the insertelt constant. Replace the constant in the shuffle's
|
||||
// mask vector with the insertelt index plus the length of the vector
|
||||
// (because the constant vector operand of a shuffle is always the 2nd
|
||||
// operand).
|
||||
Constant *Mask = Shuf->getMask();
|
||||
unsigned NumElts = Mask->getType()->getVectorNumElements();
|
||||
SmallVector<Constant *, 16> NewShufElts(NumElts);
|
||||
SmallVector<Constant *, 16> NewMaskElts(NumElts);
|
||||
for (unsigned I = 0; I != NumElts; ++I) {
|
||||
if (I == InsEltIndex) {
|
||||
NewShufElts[I] = InsEltScalar;
|
||||
Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
|
||||
NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
|
||||
} else {
|
||||
// Copy over the existing values.
|
||||
NewShufElts[I] = ShufConstVec->getAggregateElement(I);
|
||||
NewMaskElts[I] = Mask->getAggregateElement(I);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create new operands for a shuffle that includes the constant of the
|
||||
// original insertelt. The old shuffle will be dead now.
|
||||
Constant *NewShufVec = ConstantVector::get(NewShufElts);
|
||||
Constant *NewMask = ConstantVector::get(NewMaskElts);
|
||||
return new ShuffleVectorInst(Shuf->getOperand(0), NewShufVec, NewMask);
|
||||
// Create new operands for a shuffle that includes the constant of the
|
||||
// original insertelt. The old shuffle will be dead now.
|
||||
return new ShuffleVectorInst(Shuf->getOperand(0),
|
||||
ConstantVector::get(NewShufElts),
|
||||
ConstantVector::get(NewMaskElts));
|
||||
} else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
|
||||
// Transform sequences of insertelements ops with constant data/indexes into
|
||||
// a single shuffle op.
|
||||
unsigned NumElts = InsElt.getType()->getNumElements();
|
||||
|
||||
uint64_t InsertIdx[2];
|
||||
Constant *Val[2];
|
||||
if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
|
||||
!match(InsElt.getOperand(1), m_Constant(Val[0])) ||
|
||||
!match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
|
||||
!match(IEI->getOperand(1), m_Constant(Val[1])))
|
||||
return nullptr;
|
||||
SmallVector<Constant *, 16> Values(NumElts);
|
||||
SmallVector<Constant *, 16> Mask(NumElts);
|
||||
auto ValI = std::begin(Val);
|
||||
// Generate new constant vector and mask.
|
||||
// We have 2 values/masks from the insertelements instructions. Insert them
|
||||
// into new value/mask vectors.
|
||||
for (uint64_t I : InsertIdx) {
|
||||
if (!Values[I]) {
|
||||
assert(!Mask[I]);
|
||||
Values[I] = *ValI;
|
||||
Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
|
||||
NumElts + I);
|
||||
}
|
||||
++ValI;
|
||||
}
|
||||
// Remaining values are filled with 'undef' values.
|
||||
for (unsigned I = 0; I < NumElts; ++I) {
|
||||
if (!Values[I]) {
|
||||
assert(!Mask[I]);
|
||||
Values[I] = UndefValue::get(InsElt.getType()->getElementType());
|
||||
Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
|
||||
}
|
||||
}
|
||||
// Create new operands for a shuffle that includes the constant of the
|
||||
// original insertelt.
|
||||
return new ShuffleVectorInst(IEI->getOperand(0),
|
||||
ConstantVector::get(Values),
|
||||
ConstantVector::get(Mask));
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
|
||||
|
|
|
@ -26,6 +26,15 @@ define <4 x float> @twoInserts(<4 x float> %x) {
|
|||
ret <4 x float> %ins2
|
||||
}
|
||||
|
||||
define <4 x i32> @shuffleRetain(<4 x i32> %base) {
|
||||
; CHECK-LABEL: @shuffleRetain(
|
||||
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> %base, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 1>, <4 x i32> <i32 1, i32 2, i32 undef, i32 7>
|
||||
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
|
||||
;
|
||||
%shuf = shufflevector <4 x i32> %base, <4 x i32> <i32 4, i32 3, i32 2, i32 1>, <4 x i32> <i32 1, i32 2, i32 undef, i32 7>
|
||||
ret <4 x i32> %shuf
|
||||
}
|
||||
|
||||
; TODO: Transform an arbitrary shuffle with constant into a shuffle that is equivalant to a vector select.
|
||||
|
||||
define <4 x float> @disguisedSelect(<4 x float> %x) {
|
||||
|
|
|
@ -236,8 +236,8 @@ define <4 x float> @test_select(float %f, float %g) {
|
|||
|
||||
define <2 x i64> @PR24922(<2 x i64> %v) {
|
||||
; CHECK-LABEL: @PR24922(
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <2 x i64> %v, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 2, i32 1>
|
||||
; CHECK-NEXT: ret <2 x i64> [[RESULT]]
|
||||
; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x i64> %v, i64 0, i32 0
|
||||
; CHECK-NEXT: ret <2 x i64> [[RESULT1]]
|
||||
;
|
||||
%result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
|
||||
ret <2 x i64> %result
|
||||
|
|
|
@ -7,10 +7,9 @@ define<4 x float> @foo(<4 x float> %x) {
|
|||
ret<4 x float> %ins2
|
||||
}
|
||||
|
||||
; FIXME: insertelements should fold to shuffle
|
||||
; insertelements should fold to shuffle
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
|
||||
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
|
||||
; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: ret <4 x float> %
|
||||
|
||||
define<4 x float> @bar(<4 x float> %x, float %a) {
|
||||
|
@ -45,12 +44,11 @@ define<4 x float> @bazz(<4 x float> %x, i32 %a) {
|
|||
ret<4 x float> %ins6
|
||||
}
|
||||
|
||||
; FIXME: insertelements should fold to shuffle
|
||||
; insertelements should fold to shuffle
|
||||
; CHECK-LABEL: @bazz
|
||||
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 3
|
||||
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 5.000000e+00, i32 %
|
||||
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1
|
||||
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
|
||||
; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 7.000000e+00, i32 %
|
||||
; CHECK-NEXT: ret <4 x float> %
|
||||
|
||||
|
@ -75,3 +73,22 @@ define<4 x float> @bazzzz(<4 x float> %x) {
|
|||
; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2
|
||||
; CHECK-NEXT: ret <4 x float> %
|
||||
|
||||
define<4 x float> @bazzzzz() {
|
||||
%ins1 = insertelement <4 x float> insertelement (<4 x float> <float 1.0, float 2.0, float 3.0, float undef>, float 4.0, i32 3), float 5.0, i32 1
|
||||
%ins2 = insertelement<4 x float> %ins1, float 10.0, i32 2
|
||||
ret<4 x float> %ins2
|
||||
}
|
||||
|
||||
; insertelements should fold to shuffle
|
||||
; CHECK-LABEL: @bazzzzz
|
||||
; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 5.000000e+00, float 1.000000e+01, float 4.000000e+00>
|
||||
|
||||
define<4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
|
||||
%ins1 = insertelement <4 x float> insertelement (<4 x float> shufflevector (<4 x float> undef, <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0> , <4 x i32> <i32 0, i32 5, i32 undef, i32 6> ), float 4.0, i32 3), float 5.0, i32 1
|
||||
ret<4 x float> %ins1
|
||||
}
|
||||
|
||||
; insertelements should fold to shuffle
|
||||
; CHECK-LABEL: @bazzzzz
|
||||
; CHECK-NEXT: ret <4 x float> <float undef, float 5.000000e+00, float undef, float 4.000000e+00>
|
||||
|
||||
|
|
|
@ -74,7 +74,7 @@ define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
|
|||
|
||||
define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
|
||||
; CHECK-LABEL: @insertps_0xc1(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
|
||||
|
|
Loading…
Reference in New Issue