forked from OSchip/llvm-project
Revert "[AggressiveInstCombine] Generalize foldGuardedRotateToFunnelShift to generic funnel shifts"
This reverts commit 59b22e495c
.
That commit broke building for ARM and AArch64, reproducible like this:
$ cat apedec-reduced.c
a;
b(e) {
int c;
unsigned d = f();
c = d >> 32 - e;
return c;
}
g() {
int h = i();
if (a)
h = h << a | b(a);
return h;
}
$ clang -target aarch64-linux-gnu -w -c -O3 apedec-reduced.c
clang: ../lib/Transforms/InstCombine/InstructionCombining.cpp:3656: bool llvm::InstCombinerImpl::run(): Assertion `DT.dominates(BB, UserParent) && "Dominance relation broken?"' failed.
Same thing for e.g. an armv7-linux-gnueabihf target.
This commit is contained in:
parent
065047ab01
commit
36cf1e7d0e
|
@ -39,8 +39,6 @@ using namespace PatternMatch;
|
|||
STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
|
||||
STATISTIC(NumGuardedRotates,
|
||||
"Number of guarded rotates transformed into funnel shifts");
|
||||
STATISTIC(NumGuardedFunnelShifts,
|
||||
"Number of guarded funnel shifts transformed into funnel shifts");
|
||||
STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
|
||||
|
||||
namespace {
|
||||
|
@ -69,17 +67,17 @@ public:
|
|||
};
|
||||
} // namespace
|
||||
|
||||
/// Match a pattern for a bitwise funnel/rotate operation that partially guards
|
||||
/// against undefined behavior by branching around the funnel-shift/rotation
|
||||
/// when the shift amount is 0.
|
||||
static bool foldGuardedFunnelShift(Instruction &I) {
|
||||
/// Match a pattern for a bitwise rotate operation that partially guards
|
||||
/// against undefined behavior by branching around the rotation when the shift
|
||||
/// amount is 0.
|
||||
static bool foldGuardedRotateToFunnelShift(Instruction &I) {
|
||||
if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
|
||||
return false;
|
||||
|
||||
// As with the one-use checks below, this is not strictly necessary, but we
|
||||
// are being cautious to avoid potential perf regressions on targets that
|
||||
// do not actually have a funnel/rotate instruction (where the funnel shift
|
||||
// would be expanded back into math/shift/logic ops).
|
||||
// do not actually have a rotate instruction (where the funnel shift would be
|
||||
// expanded back into math/shift/logic ops).
|
||||
if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
|
||||
return false;
|
||||
|
||||
|
@ -113,33 +111,27 @@ static bool foldGuardedFunnelShift(Instruction &I) {
|
|||
return Intrinsic::not_intrinsic;
|
||||
};
|
||||
|
||||
// One phi operand must be a funnel/rotate operation, and the other phi
|
||||
// operand must be the source value of that funnel/rotate operation:
|
||||
// One phi operand must be a rotate operation, and the other phi operand must
|
||||
// be the source value of that rotate operation:
|
||||
// phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
|
||||
// phi [ fshl(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal0, GuardBB ]
|
||||
// phi [ fshr(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal1, GuardBB ]
|
||||
PHINode &Phi = cast<PHINode>(I);
|
||||
unsigned FunnelOp = 0, GuardOp = 1;
|
||||
Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
|
||||
Value *ShVal0, *ShVal1, *ShAmt;
|
||||
Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
|
||||
if (IID == Intrinsic::not_intrinsic ||
|
||||
(IID == Intrinsic::fshl && ShVal0 != P1) ||
|
||||
(IID == Intrinsic::fshr && ShVal1 != P1)) {
|
||||
if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P1) {
|
||||
IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
|
||||
if (IID == Intrinsic::not_intrinsic ||
|
||||
(IID == Intrinsic::fshl && ShVal0 != P0) ||
|
||||
(IID == Intrinsic::fshr && ShVal1 != P0))
|
||||
if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P0)
|
||||
return false;
|
||||
assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
|
||||
"Pattern must match funnel shift left or right");
|
||||
std::swap(FunnelOp, GuardOp);
|
||||
}
|
||||
assert(ShVal0 == ShVal1 && "Rotation funnel shift pattern expected");
|
||||
|
||||
// The incoming block with our source operand must be the "guard" block.
|
||||
// That must contain a cmp+branch to avoid the funnel/rotate when the shift
|
||||
// amount is equal to 0. The other incoming block is the block with the
|
||||
// funnel/rotate.
|
||||
// That must contain a cmp+branch to avoid the rotate when the shift amount
|
||||
// is equal to 0. The other incoming block is the block with the rotate.
|
||||
BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
|
||||
BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
|
||||
Instruction *TermI = GuardBB->getTerminator();
|
||||
|
@ -158,21 +150,18 @@ static bool foldGuardedFunnelShift(Instruction &I) {
|
|||
// br i1 %cmp, label %PhiBB, label %FunnelBB
|
||||
// FunnelBB:
|
||||
// %sub = sub i32 32, %ShAmt
|
||||
// %shr = lshr i32 %ShVal1, %sub
|
||||
// %shl = shl i32 %ShVal0, %ShAmt
|
||||
// %fsh = or i32 %shr, %shl
|
||||
// %shr = lshr i32 %RotSrc, %sub
|
||||
// %shl = shl i32 %RotSrc, %ShAmt
|
||||
// %rot = or i32 %shr, %shl
|
||||
// br label %PhiBB
|
||||
// PhiBB:
|
||||
// %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
|
||||
// %cond = phi i32 [ %RotSrc, %FunnelBB ], [ %RotSrc, %GuardBB ]
|
||||
// -->
|
||||
// llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
|
||||
// llvm.fshl.i32(i32 %RotSrc, i32 %RotSrc, i32 %ShAmt)
|
||||
IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
|
||||
Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
|
||||
Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt}));
|
||||
if (ShVal0 == ShVal1)
|
||||
++NumGuardedRotates;
|
||||
else
|
||||
++NumGuardedFunnelShifts;
|
||||
++NumGuardedRotates;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -361,7 +350,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
|
|||
// iteratively in this loop rather than waiting until the end.
|
||||
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
|
||||
MadeChange |= foldAnyOrAllBitsSet(I);
|
||||
MadeChange |= foldGuardedFunnelShift(I);
|
||||
MadeChange |= foldGuardedRotateToFunnelShift(I);
|
||||
MadeChange |= tryToRecognizePopCount(I);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,10 +7,14 @@ define i32 @fshl(i32 %a, i32 %b, i32 %c) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
|
||||
; CHECK: fshbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
|
@ -34,10 +38,14 @@ define i32 @fshl_commute_phi(i32 %a, i32 %b, i32 %c) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
|
||||
; CHECK: fshbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
|
@ -61,10 +69,14 @@ define i32 @fshl_commute_or(i32 %a, i32 %b, i32 %c) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
|
||||
; CHECK: fshbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
|
@ -90,11 +102,15 @@ define i32 @fshl_insert_valid_location(i32 %a, i32 %b, i32 %c) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
|
||||
; CHECK: fshbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
|
||||
; CHECK-NEXT: [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[RES:%.*]] = or i32 [[COND]], [[OTHER]]
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
entry:
|
||||
|
@ -121,10 +137,14 @@ define i32 @fshr(i32 %a, i32 %b, i32 %c) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
|
||||
; CHECK: fshbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[B]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
|
@ -148,10 +168,14 @@ define i32 @fshr_commute_phi(i32 %a, i32 %b, i32 %c) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
|
||||
; CHECK: fshbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
|
@ -175,10 +199,14 @@ define i32 @fshr_commute_or(i32 %a, i32 %b, i32 %c) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
|
||||
; CHECK: fshbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
|
@ -368,7 +396,7 @@ end:
|
|||
ret i32 %cond
|
||||
}
|
||||
|
||||
; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
|
||||
; Negative test - wrong shift.
|
||||
|
||||
define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @not_fshr_5(
|
||||
|
@ -376,10 +404,14 @@ define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
|
||||
; CHECK: fshbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[C]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[B:%.*]], i32 [[C]])
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
|
|
|
@ -370,7 +370,7 @@ end:
|
|||
ret i32 %cond
|
||||
}
|
||||
|
||||
; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
|
||||
; Negative test - wrong shift.
|
||||
|
||||
define i32 @not_rotr_5(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @not_rotr_5(
|
||||
|
@ -378,10 +378,14 @@ define i32 @not_rotr_5(i32 %a, i32 %b) {
|
|||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
|
||||
; CHECK: rotbb:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
|
||||
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[B]], [[SUB]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]]
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[B]], i32 [[A:%.*]], i32 [[B]])
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
|
||||
; CHECK-NEXT: ret i32 [[COND]]
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32 %b, 0
|
||||
|
|
Loading…
Reference in New Issue