[AggressiveInstCombine] Generalize foldGuardedRotateToFunnelShift to generic funnel shifts

The fold currently only handles rotation patterns, but with the maturation of backend funnel shift handling we can now realistically handle all funnel shift patterns.

This should allow us to begin resolving PR46896 et al.

Differential Revision: https://reviews.llvm.org/D90625
This commit is contained in:
Simon Pilgrim 2020-11-03 10:49:33 +00:00
parent 9925168576
commit 59b22e495c
3 changed files with 52 additions and 77 deletions

View File

@ -39,6 +39,8 @@ using namespace PatternMatch;
STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
STATISTIC(NumGuardedRotates,
"Number of guarded rotates transformed into funnel shifts");
STATISTIC(NumGuardedFunnelShifts,
"Number of guarded funnel shifts transformed into funnel shifts");
STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
namespace {
@ -67,17 +69,17 @@ public:
};
} // namespace
/// Match a pattern for a bitwise rotate operation that partially guards
/// against undefined behavior by branching around the rotation when the shift
/// amount is 0.
static bool foldGuardedRotateToFunnelShift(Instruction &I) {
/// Match a pattern for a bitwise funnel/rotate operation that partially guards
/// against undefined behavior by branching around the funnel-shift/rotation
/// when the shift amount is 0.
static bool foldGuardedFunnelShift(Instruction &I) {
if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
return false;
// As with the one-use checks below, this is not strictly necessary, but we
// are being cautious to avoid potential perf regressions on targets that
// do not actually have a rotate instruction (where the funnel shift would be
// expanded back into math/shift/logic ops).
// do not actually have a funnel/rotate instruction (where the funnel shift
// would be expanded back into math/shift/logic ops).
if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
return false;
@ -111,27 +113,33 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) {
return Intrinsic::not_intrinsic;
};
// One phi operand must be a rotate operation, and the other phi operand must
// be the source value of that rotate operation:
// One phi operand must be a funnel/rotate operation, and the other phi
// operand must be the source value of that funnel/rotate operation:
// phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
// phi [ fshl(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal0, GuardBB ]
// phi [ fshr(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal1, GuardBB ]
PHINode &Phi = cast<PHINode>(I);
unsigned FunnelOp = 0, GuardOp = 1;
Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
Value *ShVal0, *ShVal1, *ShAmt;
Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P1) {
if (IID == Intrinsic::not_intrinsic ||
(IID == Intrinsic::fshl && ShVal0 != P1) ||
(IID == Intrinsic::fshr && ShVal1 != P1)) {
IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P0)
if (IID == Intrinsic::not_intrinsic ||
(IID == Intrinsic::fshl && ShVal0 != P0) ||
(IID == Intrinsic::fshr && ShVal1 != P0))
return false;
assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
"Pattern must match funnel shift left or right");
std::swap(FunnelOp, GuardOp);
}
assert(ShVal0 == ShVal1 && "Rotation funnel shift pattern expected");
// The incoming block with our source operand must be the "guard" block.
// That must contain a cmp+branch to avoid the rotate when the shift amount
// is equal to 0. The other incoming block is the block with the rotate.
// That must contain a cmp+branch to avoid the funnel/rotate when the shift
// amount is equal to 0. The other incoming block is the block with the
// funnel/rotate.
BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
Instruction *TermI = GuardBB->getTerminator();
@ -150,18 +158,21 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) {
// br i1 %cmp, label %PhiBB, label %FunnelBB
// FunnelBB:
// %sub = sub i32 32, %ShAmt
// %shr = lshr i32 %RotSrc, %sub
// %shl = shl i32 %RotSrc, %ShAmt
// %rot = or i32 %shr, %shl
// %shr = lshr i32 %ShVal1, %sub
// %shl = shl i32 %ShVal0, %ShAmt
// %fsh = or i32 %shr, %shl
// br label %PhiBB
// PhiBB:
// %cond = phi i32 [ %RotSrc, %FunnelBB ], [ %RotSrc, %GuardBB ]
// %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
// -->
// llvm.fshl.i32(i32 %RotSrc, i32 %RotSrc, i32 %ShAmt)
// llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt}));
if (ShVal0 == ShVal1)
++NumGuardedRotates;
else
++NumGuardedFunnelShifts;
return true;
}
@ -350,7 +361,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
// iteratively in this loop rather than waiting until the end.
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedRotateToFunnelShift(I);
MadeChange |= foldGuardedFunnelShift(I);
MadeChange |= tryToRecognizePopCount(I);
}
}

View File

@ -7,14 +7,10 @@ define i32 @fshl(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
@ -38,14 +34,10 @@ define i32 @fshl_commute_phi(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
@ -69,14 +61,10 @@ define i32 @fshl_commute_or(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
@ -102,15 +90,11 @@ define i32 @fshl_insert_valid_location(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY]] ]
; CHECK-NEXT: [[RES:%.*]] = or i32 [[COND]], [[OTHER]]
; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
; CHECK-NEXT: [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
@ -137,14 +121,10 @@ define i32 @fshr(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[B]], [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
@ -168,14 +148,10 @@ define i32 @fshr_commute_phi(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
@ -199,14 +175,10 @@ define i32 @fshr_commute_or(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
@ -396,7 +368,7 @@ end:
ret i32 %cond
}
; Negative test - wrong shift.
; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @not_fshr_5(
@ -404,14 +376,10 @@ define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[C]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[B:%.*]], i32 [[C]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0

View File

@ -370,7 +370,7 @@ end:
ret i32 %cond
}
; Negative test - wrong shift.
; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
define i32 @not_rotr_5(i32 %a, i32 %b) {
; CHECK-LABEL: @not_rotr_5(
@ -378,14 +378,10 @@ define i32 @not_rotr_5(i32 %a, i32 %b) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[B]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
; CHECK-NEXT: ret i32 [[COND]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[B]], i32 [[A:%.*]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0