[InstCombine] allow lengthening of insertelement to eliminate shuffles

As noted in post-commit comments for D52548, the limitation on 
increasing vector length can be applied by opcode.
As a first step, this patch only allows insertelement to be
widened because that has no logical downsides for IR and has 
little risk of pessimizing codegen.

This may cause PR39132 to go into hiding during a full compile,
but that bug is not fixed.

llvm-svn: 343406
This commit is contained in:
Sanjay Patel 2018-09-30 13:50:42 +00:00
parent 818cfc40ff
commit 26c119a9c2
2 changed files with 12 additions and 7 deletions

View File

@ -918,6 +918,13 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::GetElementPtr: {
// Bail out if we would create longer vector ops. We could allow creating
// longer vector ops, but that may result in more expensive codegen. We
// would also need to limit the transform to avoid undefined behavior for
// integer div/rem.
Type *ITy = I->getType();
if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
return false;
for (Value *Operand : I->operands()) {
if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
return false;
@ -1464,8 +1471,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isRHSID) return replaceInstUsesWith(SVI, RHS);
}
if (isa<UndefValue>(RHS) && !SVI.increasesLength() &&
canEvaluateShuffled(LHS, Mask)) {
if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
Value *V = evaluateInDifferentElementOrder(LHS, Mask);
return replaceInstUsesWith(SVI, V);
}

View File

@ -218,14 +218,13 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) {
ret <3 x i32> %ext
}
; TODO: Increasing length of insertelements (no math ops) is a good canonicalization.
; Increasing length of insertelements (no math ops) is a good canonicalization.
define <3 x i8> @fold_inselts_with_widening_shuffle(i8 %x, i8 %y) {
; CHECK-LABEL: @fold_inselts_with_widening_shuffle(
; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i8> [[INS0]], i8 [[Y:%.*]], i32 1
; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x i8> [[INS1]], <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: ret <3 x i8> [[WIDEN]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x i8> [[TMP1]], i8 [[Y:%.*]], i32 1
; CHECK-NEXT: ret <3 x i8> [[TMP2]]
;
%ins0 = insertelement <2 x i8> undef, i8 %x, i32 0
%ins1 = insertelement <2 x i8> %ins0, i8 %y, i32 1