[PowerPC] Turn on CR-Logical reducer pass

This re-commits r375152 which was pulled in r375233 because it broke
the EXPENSIVE_CHECKS bot on Windows.

The reason for the failure was a bug in the pass that the commit turned
on by default. This patch fixes that bug and turns the pass back on.
This patch has been verified on the buildbot that originally failed
thanks to Simon Pilgrim.

Differential revision: https://reviews.llvm.org/D52431

llvm-svn: 375497
This commit is contained in:
Nemanja Ivanovic 2019-10-22 12:20:38 +00:00
parent 8e050e41a4
commit f2c8f3b181
7 changed files with 58 additions and 50 deletions

View File

@ -381,10 +381,10 @@ private:
const MachineBranchProbabilityInfo *MBPI; const MachineBranchProbabilityInfo *MBPI;
// A vector to contain all the CR logical operations // A vector to contain all the CR logical operations
std::vector<CRLogicalOpInfo> AllCRLogicalOps; SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
void initialize(MachineFunction &MFParm); void initialize(MachineFunction &MFParm);
void collectCRLogicals(); void collectCRLogicals();
bool handleCROp(CRLogicalOpInfo &CRI); bool handleCROp(unsigned Idx);
bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI); bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
static bool isCRLogical(MachineInstr &MI) { static bool isCRLogical(MachineInstr &MI) {
unsigned Opc = MI.getOpcode(); unsigned Opc = MI.getOpcode();
@ -398,7 +398,7 @@ private:
// Not using a range-based for loop here as the vector may grow while being // Not using a range-based for loop here as the vector may grow while being
// operated on. // operated on.
for (unsigned i = 0; i < AllCRLogicalOps.size(); i++) for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
Changed |= handleCROp(AllCRLogicalOps[i]); Changed |= handleCROp(i);
return Changed; return Changed;
} }
@ -578,10 +578,11 @@ void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
/// a unary CR logical might be used to change the condition code on a /// a unary CR logical might be used to change the condition code on a
/// comparison feeding it. A nullary CR logical might simply be removable /// comparison feeding it. A nullary CR logical might simply be removable
/// if the user of the bit it [un]sets can be transformed. /// if the user of the bit it [un]sets can be transformed.
bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo &CRI) { bool PPCReduceCRLogicals::handleCROp(unsigned Idx) {
// We can definitely split a block on the inputs to a binary CR operation // We can definitely split a block on the inputs to a binary CR operation
// whose defs and (single) use are within the same block. // whose defs and (single) use are within the same block.
bool Changed = false; bool Changed = false;
CRLogicalOpInfo CRI = AllCRLogicalOps[Idx];
if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR && if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
CRI.DefsSingleUse) { CRI.DefsSingleUse) {
Changed = splitBlockOnBinaryCROp(CRI); Changed = splitBlockOnBinaryCROp(CRI);

View File

@ -93,7 +93,7 @@ EnableMachineCombinerPass("ppc-machine-combiner",
static cl::opt<bool> static cl::opt<bool>
ReduceCRLogical("ppc-reduce-cr-logicals", ReduceCRLogical("ppc-reduce-cr-logicals",
cl::desc("Expand eligible cr-logical binary ops to branches"), cl::desc("Expand eligible cr-logical binary ops to branches"),
cl::init(false), cl::Hidden); cl::init(true), cl::Hidden);
extern "C" void LLVMInitializePowerPCTarget() { extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets // Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());

View File

@ -36,7 +36,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: # %bb.1: # %bb5 ; CHECK-NEXT: # %bb.1: # %bb5
; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: li 3, 0
; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: li 4, 0
; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: b .LBB0_17
; CHECK-NEXT: .LBB0_2: # %bb1 ; CHECK-NEXT: .LBB0_2: # %bb1
; CHECK-NEXT: lfd 0, 400(1) ; CHECK-NEXT: lfd 0, 400(1)
; CHECK-NEXT: lis 3, 15856 ; CHECK-NEXT: lis 3, 15856
@ -166,13 +166,11 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: bl __gcc_qsub@PLT ; CHECK-NEXT: bl __gcc_qsub@PLT
; CHECK-NEXT: stfd 2, 176(1) ; CHECK-NEXT: stfd 2, 176(1)
; CHECK-NEXT: stfd 1, 168(1) ; CHECK-NEXT: stfd 1, 168(1)
; CHECK-NEXT: fcmpu 0, 2, 27 ; CHECK-NEXT: fcmpu 1, 2, 27
; CHECK-NEXT: lwz 3, 180(1) ; CHECK-NEXT: lwz 3, 180(1)
; CHECK-NEXT: fcmpu 1, 1, 27 ; CHECK-NEXT: fcmpu 0, 1, 27
; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: crandc 20, 2, 4
; CHECK-NEXT: cror 21, 5, 7
; CHECK-NEXT: stw 3, 268(1) ; CHECK-NEXT: stw 3, 268(1)
; CHECK-NEXT: cror 20, 21, 20
; CHECK-NEXT: lwz 3, 176(1) ; CHECK-NEXT: lwz 3, 176(1)
; CHECK-NEXT: stw 3, 264(1) ; CHECK-NEXT: stw 3, 264(1)
; CHECK-NEXT: lwz 3, 172(1) ; CHECK-NEXT: lwz 3, 172(1)
@ -181,8 +179,11 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: lwz 3, 168(1) ; CHECK-NEXT: lwz 3, 168(1)
; CHECK-NEXT: stw 3, 272(1) ; CHECK-NEXT: stw 3, 272(1)
; CHECK-NEXT: lfd 31, 272(1) ; CHECK-NEXT: lfd 31, 272(1)
; CHECK-NEXT: bc 12, 20, .LBB0_13 ; CHECK-NEXT: bc 12, 20, .LBB0_14
; CHECK-NEXT: # %bb.10: # %bb2 ; CHECK-NEXT: # %bb.10: # %bb1
; CHECK-NEXT: cror 20, 1, 3
; CHECK-NEXT: bc 12, 20, .LBB0_14
; CHECK-NEXT: # %bb.11: # %bb2
; CHECK-NEXT: fneg 28, 31 ; CHECK-NEXT: fneg 28, 31
; CHECK-NEXT: stfd 28, 48(1) ; CHECK-NEXT: stfd 28, 48(1)
; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: lis 3, 16864
@ -231,15 +232,15 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: crandc 20, 6, 1 ; CHECK-NEXT: crandc 20, 6, 1
; CHECK-NEXT: cror 20, 4, 20 ; CHECK-NEXT: cror 20, 4, 20
; CHECK-NEXT: addis 3, 3, -32768 ; CHECK-NEXT: addis 3, 3, -32768
; CHECK-NEXT: bc 12, 20, .LBB0_12 ; CHECK-NEXT: bc 12, 20, .LBB0_13
; CHECK-NEXT: # %bb.11: # %bb2 ; CHECK-NEXT: # %bb.12: # %bb2
; CHECK-NEXT: ori 3, 4, 0 ; CHECK-NEXT: ori 3, 4, 0
; CHECK-NEXT: b .LBB0_12 ; CHECK-NEXT: b .LBB0_13
; CHECK-NEXT: .LBB0_12: # %bb2 ; CHECK-NEXT: .LBB0_13: # %bb2
; CHECK-NEXT: subfic 4, 3, 0 ; CHECK-NEXT: subfic 4, 3, 0
; CHECK-NEXT: subfe 3, 29, 30 ; CHECK-NEXT: subfe 3, 29, 30
; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: b .LBB0_17
; CHECK-NEXT: .LBB0_13: # %bb3 ; CHECK-NEXT: .LBB0_14: # %bb3
; CHECK-NEXT: stfd 31, 112(1) ; CHECK-NEXT: stfd 31, 112(1)
; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: li 3, 0
; CHECK-NEXT: stw 3, 148(1) ; CHECK-NEXT: stw 3, 148(1)
@ -286,13 +287,13 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: crandc 20, 6, 0
; CHECK-NEXT: cror 20, 5, 20 ; CHECK-NEXT: cror 20, 5, 20
; CHECK-NEXT: addis 3, 3, -32768 ; CHECK-NEXT: addis 3, 3, -32768
; CHECK-NEXT: bc 12, 20, .LBB0_14 ; CHECK-NEXT: bc 12, 20, .LBB0_15
; CHECK-NEXT: b .LBB0_15 ; CHECK-NEXT: b .LBB0_16
; CHECK-NEXT: .LBB0_14: # %bb3
; CHECK-NEXT: addi 4, 3, 0
; CHECK-NEXT: .LBB0_15: # %bb3 ; CHECK-NEXT: .LBB0_15: # %bb3
; CHECK-NEXT: addi 4, 3, 0
; CHECK-NEXT: .LBB0_16: # %bb3
; CHECK-NEXT: mr 3, 30 ; CHECK-NEXT: mr 3, 30
; CHECK-NEXT: .LBB0_16: # %bb5 ; CHECK-NEXT: .LBB0_17: # %bb5
; CHECK-NEXT: lfd 31, 456(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 31, 456(1) # 8-byte Folded Reload
; CHECK-NEXT: lfd 30, 448(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 30, 448(1) # 8-byte Folded Reload
; CHECK-NEXT: lfd 29, 440(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 29, 440(1) # 8-byte Folded Reload

View File

@ -1,5 +1,7 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s ; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s
define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
; CHECK-LABEL: testi32slt ; CHECK-LABEL: testi32slt

View File

@ -14,13 +14,15 @@ define void @f(i8*, i8*, i64*) {
; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: li 4, 0
; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_2: # ; CHECK-NEXT: .LBB0_2: #
; CHECK-NEXT: cmplwi 4, 14
; CHECK-NEXT: cmpd 1, 3, 4
; CHECK-NEXT: sldi 6, 6, 4 ; CHECK-NEXT: sldi 6, 6, 4
; CHECK-NEXT: cror 20, 6, 1 ; CHECK-NEXT: cmplwi 4, 14
; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: addi 7, 4, 1
; CHECK-NEXT: bc 4, 20, .LBB0_2 ; CHECK-NEXT: bc 12, 1, .LBB0_4
; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: # %bb.3: #
; CHECK-NEXT: cmpd 3, 4
; CHECK-NEXT: mr 4, 7
; CHECK-NEXT: bc 4, 2, .LBB0_2
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: std 6, 8(5) ; CHECK-NEXT: std 6, 8(5)
; CHECK-NEXT: blr ; CHECK-NEXT: blr

View File

@ -16,16 +16,17 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig
; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: mr r29, r5 ; CHECK-NEXT: mr r29, r5
; CHECK-NEXT: cmpwi cr1, r4, 11
; CHECK-NEXT: mr r30, r3 ; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: extsw r28, r4 ; CHECK-NEXT: extsw r28, r4
; CHECK-NEXT: std r2, 24(r1) ; CHECK-NEXT: std r2, 24(r1)
; CHECK-NEXT: cmpwi r29, 1 ; CHECK-NEXT: cmpwi r29, 1
; CHECK-NEXT: cror 4*cr5+lt, lt, 4*cr1+lt ; CHECK-NEXT: bc 12, lt, .LBB0_3
; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: cmpwi cr0, r4, 11
; CHECK-NEXT: bc 12, lt, .LBB0_3
; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_1: # %for.body.us ; CHECK-NEXT: .LBB0_2: # %for.body.us
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: #
; CHECK-NEXT: mtctr r30 ; CHECK-NEXT: mtctr r30
; CHECK-NEXT: mr r3, r28 ; CHECK-NEXT: mr r3, r28
; CHECK-NEXT: mr r12, r30 ; CHECK-NEXT: mr r12, r30
@ -33,8 +34,8 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig
; CHECK-NEXT: ld 2, 24(r1) ; CHECK-NEXT: ld 2, 24(r1)
; CHECK-NEXT: addi r29, r29, -1 ; CHECK-NEXT: addi r29, r29, -1
; CHECK-NEXT: cmplwi r29, 0 ; CHECK-NEXT: cmplwi r29, 0
; CHECK-NEXT: bne cr0, .LBB0_1 ; CHECK-NEXT: bne cr0, .LBB0_2
; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup ; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup
; CHECK-NEXT: mtctr r30 ; CHECK-NEXT: mtctr r30
; CHECK-NEXT: mr r3, r28 ; CHECK-NEXT: mr r3, r28
; CHECK-NEXT: mr r12, r30 ; CHECK-NEXT: mr r12, r30

View File

@ -240,22 +240,23 @@ entry:
define i128 @invalidv1i128(<2 x i128> %v1, <2 x i128> %v2) { define i128 @invalidv1i128(<2 x i128> %v1, <2 x i128> %v2) {
; CHECK-LABEL: invalidv1i128: ; CHECK-LABEL: invalidv1i128:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: mfvsrd 3, 36
; CHECK-NEXT: xxswapd 0, 36 ; CHECK-NEXT: xxswapd 0, 36
; CHECK-NEXT: mfvsrd 4, 36 ; CHECK-NEXT: mfvsrd 4, 34
; CHECK-NEXT: mfvsrd 5, 34 ; CHECK-NEXT: xxswapd 1, 34
; CHECK-NEXT: cmpld 4, 3
; CHECK-NEXT: cmpd 1, 4, 3
; CHECK-NEXT: mfvsrd 3, 0 ; CHECK-NEXT: mfvsrd 3, 0
; CHECK-NEXT: xxswapd 0, 34
; CHECK-NEXT: cmpld 5, 4
; CHECK-NEXT: cmpd 1, 5, 4
; CHECK-NEXT: crandc 20, 4, 2 ; CHECK-NEXT: crandc 20, 4, 2
; CHECK-NEXT: mfvsrd 6, 0 ; CHECK-NEXT: mfvsrd 4, 1
; CHECK-NEXT: cmpld 1, 6, 3 ; CHECK-NEXT: cmpld 1, 4, 3
; CHECK-NEXT: crand 21, 2, 4 ; CHECK-NEXT: bc 12, 20, .LBB12_3
; CHECK-NEXT: cror 20, 21, 20
; CHECK-NEXT: bc 12, 20, .LBB12_2
; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: crand 20, 2, 4
; CHECK-NEXT: bc 12, 20, .LBB12_3
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: vmr 2, 4 ; CHECK-NEXT: vmr 2, 4
; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: .LBB12_3:
; CHECK-NEXT: xxswapd 0, 34 ; CHECK-NEXT: xxswapd 0, 34
; CHECK-NEXT: mfvsrd 4, 34 ; CHECK-NEXT: mfvsrd 4, 34
; CHECK-NEXT: mfvsrd 3, 0 ; CHECK-NEXT: mfvsrd 3, 0