forked from OSchip/llvm-project
[InstCombine] insert a new shuffle in a safe place (PR25999)
Limit this transform to a basic block and guard against PHIs. Hopefully, this fixes the remaining failures in PR25999: https://llvm.org/bugs/show_bug.cgi?id=25999 llvm-svn: 257133
This commit is contained in:
parent
506ecac085
commit
d72a458d28
|
@ -384,23 +384,20 @@ static void replaceExtractElements(InsertElementInst *InsElt,
|
|||
ConstantVector::get(ExtendMask));
|
||||
|
||||
// Insert the new shuffle after the vector operand of the extract is defined
|
||||
// or at the start of the basic block, so any subsequent extracts can use it.
|
||||
bool ReplaceAllExtUsers;
|
||||
if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) {
|
||||
// (as long as it's not a PHI) or at the start of the basic block of the
|
||||
// extract, so any subsequent extracts in the same basic block can use it.
|
||||
// TODO: Insert before the earliest ExtractElementInst that is replaced.
|
||||
auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
|
||||
if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
|
||||
WideVec->insertAfter(ExtVecOpInst);
|
||||
ReplaceAllExtUsers = true;
|
||||
} else {
|
||||
// TODO: Insert at start of function, so it's always safe to replace all?
|
||||
else
|
||||
IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
|
||||
ReplaceAllExtUsers = false;
|
||||
}
|
||||
|
||||
// Replace extracts from the original narrow vector with extracts from the new
|
||||
// wide vector.
|
||||
for (User *U : ExtVecOp->users()) {
|
||||
ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
|
||||
if (!OldExt ||
|
||||
(!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent()))
|
||||
if (!OldExt || OldExt->getParent() != WideVec->getParent())
|
||||
continue;
|
||||
auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
|
||||
NewExt->insertAfter(WideVec);
|
||||
|
|
|
@ -125,3 +125,53 @@ end:
|
|||
ret <8 x i16> %t6
|
||||
}
|
||||
|
||||
; The widening shuffle must be inserted at a valid point (after the PHIs).
|
||||
|
||||
define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) {
|
||||
; CHECK-LABEL: @pr25999_phis1(
|
||||
; CHECK: %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
|
||||
; CHECK-NEXT: %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
|
||||
; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x double> %tmp1, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: %tmp4 = shufflevector <4 x double> %tmp2, <4 x double> %[[WIDEVEC]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
|
||||
; CHECK-NEXT: ret <4 x double> %tmp4
|
||||
bb1:
|
||||
br i1 %c, label %bb2, label %bb3
|
||||
|
||||
bb2:
|
||||
%r = call <2 x double> @dummy(<2 x double> %a)
|
||||
br label %bb3
|
||||
|
||||
bb3:
|
||||
%tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
|
||||
%tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
|
||||
%tmp3 = extractelement <2 x double> %tmp1, i32 0
|
||||
%tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
|
||||
ret <4 x double> %tmp4
|
||||
}
|
||||
|
||||
declare <2 x double> @dummy(<2 x double>)
|
||||
|
||||
define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) {
|
||||
; CHECK-LABEL: @pr25999_phis2(
|
||||
; CHECK: %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
|
||||
; CHECK-NEXT: %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
|
||||
; CHECK-NEXT: %d = fadd <2 x double> %tmp1, %tmp1
|
||||
; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x double> %d, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: %tmp4 = shufflevector <4 x double> %tmp2, <4 x double> %[[WIDEVEC]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
|
||||
; CHECK-NEXT: ret <4 x double> %tmp4
|
||||
bb1:
|
||||
br i1 %c, label %bb2, label %bb3
|
||||
|
||||
bb2:
|
||||
%r = call <2 x double> @dummy(<2 x double> %a)
|
||||
br label %bb3
|
||||
|
||||
bb3:
|
||||
%tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
|
||||
%tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
|
||||
%d = fadd <2 x double> %tmp1, %tmp1
|
||||
%tmp3 = extractelement <2 x double> %d, i32 0
|
||||
%tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
|
||||
ret <4 x double> %tmp4
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue