From 683994c863b8b8a46372408773e4bd65502c5193 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 24 Sep 2021 11:43:43 -0700 Subject: [PATCH] X86InstrInfo: Refactor and cleanup optimizeCompareInstr This changes the first part of `optimizeCompareInstr` being split into a loop with a forward scan for cases that re-use zero flags from a producer in case of compare with zero and a backward scan for finding an instruction equivalent to a compare. The code now uses a single backward scan searching for the next instructions that reads or writes EFLAGS. Also: - Add comments giving examples for the 3 cases handled. - Check `MI` which contains the result of the zero-compare cases, instead of re-checking `IsCmpZero`. - Tweak coding style in some loops. - Add new MIR based tests that test the optimization in isolation. This also removes a check for flag readers in situations like this: ``` = SUB32rr %0, %1, implicit-def $eflags ... we no longer stop when there are $eflag users here CMP32rr %0, %1 ; will be removed ... ``` Differential Revision: https://reviews.llvm.org/D110857 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 145 ++++++++++----------- llvm/test/CodeGen/X86/optimize-compare.mir | 103 +++++++++++++++ 2 files changed, 170 insertions(+), 78 deletions(-) create mode 100644 llvm/test/CodeGen/X86/optimize-compare.mir diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 3526955f2814..cc2d425eadd9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4275,93 +4275,84 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, } } - // Get the unique definition of SrcReg. - MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); - if (!MI) return false; + // The following code tries to remove the comparison by re-using EFLAGS + // from earlier instructions. - // CmpInstr is the first instruction of the BB. - MachineBasicBlock::iterator I = CmpInstr, Def = MI; - - // If we are comparing against zero, check whether we can use MI to update - // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. bool IsCmpZero = (CmpMask != 0 && CmpValue == 0); - if (IsCmpZero && MI->getParent() != CmpInstr.getParent()) - return false; - // If we have a use of the source register between the def and our compare - // instruction we can eliminate the compare iff the use sets EFLAGS in the - // right way. - bool ShouldUpdateCC = false; + MachineInstr *MI = nullptr; + MachineInstr *Sub = nullptr; + MachineInstr *Movr0Inst = nullptr; bool NoSignFlag = false; bool ClearsOverflowFlag = false; + bool ShouldUpdateCC = false; X86::CondCode NewCC = X86::COND_INVALID; - if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) { - // Scan forward from the use until we hit the use we're looking for or the - // compare instruction. - for (MachineBasicBlock::iterator J = MI;; ++J) { - // Do we have a convertible instruction? - NewCC = isUseDefConvertible(*J); - if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() && - J->getOperand(1).getReg() == SrcReg) { - assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!"); - ShouldUpdateCC = true; // Update CC later on. - // This is not a def of SrcReg, but still a def of EFLAGS. Keep going - // with the new def. - Def = J; - MI = &*Def; + + // Search backward from CmpInstr for the next instruction defining EFLAGS. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineBasicBlock &CmpMBB = *CmpInstr.getParent(); + MachineBasicBlock::reverse_iterator From = + std::next(MachineBasicBlock::reverse_iterator(CmpInstr)); + MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg); + assert(SrcRegDef && "Must have a definition (SSA)"); + for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) { + // Try to use EFLAGS from the instruction defining %SrcReg. Example: + // %eax = addl ... + // ... // EFLAGS not changed + // testl %eax, %eax // <-- can be removed + if (&Inst == SrcRegDef) { + if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) { + MI = &Inst; + break; + } + // Cannot find other candidates before definition of SrcReg. + return false; + } + + if (Inst.modifiesRegister(X86::EFLAGS, TRI)) { + // Try to use EFLAGS produced by an instruction reading %SrcReg. Example: + // %eax = ... + // ... + // popcntl %eax + // ... // EFLAGS not changed + // testl %eax, %eax // <-- can be removed + if (IsCmpZero) { + NewCC = isUseDefConvertible(Inst); + if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() && + Inst.getOperand(1).getReg() == SrcReg) { + ShouldUpdateCC = true; + MI = &Inst; + break; + } + } + + // Try to use EFLAGS from an instruction with similar flag results. + // Example: + // sub x, y or cmp x, y + // ... // EFLAGS not changed + // cmp x, y // <-- can be removed + if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, + CmpValue, Inst)) { + Sub = &Inst; break; } - if (J == I) - return false; - } - } - - // We are searching for an earlier instruction that can make CmpInstr - // redundant and that instruction will be saved in Sub. - MachineInstr *Sub = nullptr; - const TargetRegisterInfo *TRI = &getRegisterInfo(); - - // We iterate backward, starting from the instruction before CmpInstr and - // stop when reaching the definition of a source register or done with the BB. - // RI points to the instruction before CmpInstr. - // If the definition is in this basic block, RE points to the definition; - // otherwise, RE is the rend of the basic block. - MachineBasicBlock::reverse_iterator - RI = ++I.getReverse(), - RE = CmpInstr.getParent() == MI->getParent() - ? Def.getReverse() /* points to MI */ - : CmpInstr.getParent()->rend(); - MachineInstr *Movr0Inst = nullptr; - for (; RI != RE; ++RI) { - MachineInstr &Instr = *RI; - // Check whether CmpInstr can be made redundant by the current instruction. - if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, - CmpValue, Instr)) { - Sub = &Instr; - break; - } - - if (Instr.modifiesRegister(X86::EFLAGS, TRI) || - Instr.readsRegister(X86::EFLAGS, TRI)) { - // This instruction modifies or uses EFLAGS. - - // MOV32r0 etc. are implemented with xor which clobbers condition code. - // They are safe to move up, if the definition to EFLAGS is dead and - // earlier instructions do not read or write EFLAGS. - if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 && - Instr.registerDefIsDead(X86::EFLAGS, TRI)) { - Movr0Inst = &Instr; + // MOV32r0 is implemented with xor which clobbers condition code. It is + // safe to move up, if the definition to EFLAGS is dead and earlier + // instructions do not read or write EFLAGS. + if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 && + Inst.registerDefIsDead(X86::EFLAGS, TRI)) { + Movr0Inst = &Inst; continue; } - // We can't remove CmpInstr. + // Cannot do anything for any other EFLAG changes. return false; } } // Return false if no candidates exist. - if (!IsCmpZero && !Sub) + if (!MI && !Sub) return false; bool IsSwapped = @@ -4374,9 +4365,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // live-out. bool IsSafe = false; SmallVector, 4> OpsToUpdate; - MachineBasicBlock::iterator E = CmpInstr.getParent()->end(); - for (++I; I != E; ++I) { - const MachineInstr &Instr = *I; + MachineBasicBlock::iterator AfterCmpInstr = + std::next(MachineBasicBlock::iterator(CmpInstr)); + for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) { bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI); bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI); // We should check the usage if this instruction uses and updates EFLAGS. @@ -4449,7 +4440,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // Push the MachineInstr to OpsToUpdate. // If it is safe to remove CmpInstr, the condition code of these // instructions will be modified. - OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC)); + OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC)); } if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { // It is safe to remove CmpInstr if EFLAGS is updated again or killed. @@ -4461,8 +4452,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // If EFLAGS is not killed nor re-defined, we should check whether it is // live-out. If it is live-out, do not optimize. if ((IsCmpZero || IsSwapped) && !IsSafe) { - MachineBasicBlock *MBB = CmpInstr.getParent(); - for (MachineBasicBlock *Successor : MBB->successors()) + for (MachineBasicBlock *Successor : CmpMBB.successors()) if (Successor->isLiveIn(X86::EFLAGS)) return false; } @@ -4472,8 +4462,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // Move Movr0Inst to the appropriate place before Sub. if (Movr0Inst) { // Look backwards until we find a def that doesn't use the current EFLAGS. - Def = Sub; - MachineBasicBlock::reverse_iterator InsertI = Def.getReverse(), + MachineBasicBlock::reverse_iterator InsertI = Sub, InsertE = Sub->getParent()->rend(); for (; InsertI != InsertE; ++InsertI) { MachineInstr *Instr = &*InsertI; diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir new file mode 100644 index 000000000000..8a9c6504698b --- /dev/null +++ b/llvm/test/CodeGen/X86/optimize-compare.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=x86_64-- -run-pass peephole-opt | FileCheck %s + +--- +name: opt_zerocmp_0 +body: | + bb.0: + ; CHECK-LABEL: name: opt_zerocmp_0 + ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[COPY]], i32 1234, implicit-def $eflags + ; CHECK-NEXT: $al = SETCCr 5, implicit $eflags + %0:gr32 = COPY $esi + %1:gr32 = XOR32ri %0, i32 1234, implicit-def $eflags + ; TEST should be removed. + TEST32rr %1, %1, implicit-def $eflags + $al = SETCCr 5, implicit $eflags +... +--- +name: opt_zerocmp_1 +body: | + bb.0: + ; CHECK-LABEL: name: opt_zerocmp_1 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[DEC64r:%[0-9]+]]:gr64 = DEC64r [[COPY]], implicit-def $eflags + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[DEC64r]], 5, $noreg, 12, $noreg + ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags + %0:gr64 = COPY $rsi + %1:gr64 = DEC64r %0, implicit-def $eflags + ; CMP should be removed. + CMP64ri8 %1, 0, implicit-def $eflags + %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg + $al = SETCCr 4, implicit $eflags +... +--- +name: opt_zerocmp_user_0 +body: | + bb.0: + ; CHECK-LABEL: name: opt_zerocmp_user_0 + ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[NEG32r:%[0-9]+]]:gr32 = NEG32r [[COPY]], implicit-def $eflags + ; CHECK-NEXT: $al = SETCCr 3, implicit $eflags + %0:gr32 = COPY $esi + %1:gr32 = NEG32r %0, implicit-def dead $eflags + ; TEST should be removed. + TEST32rr %0, %0, implicit-def $eflags + $al = SETCCr 4, implicit $eflags +... +--- +name: opt_redundant_flags_0 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_0 + ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags + ; CHECK-NEXT: $eax = COPY [[SUB32rr]] + ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + %0:gr32 = COPY $esi + %1:gr32 = COPY $edi + %2:gr32 = SUB32rr %0, %1, implicit-def dead $eflags + $eax = COPY %2 + ; CMP should be removed. + CMP32rr %0, %1, implicit-def $eflags + $bl = SETCCr 2, implicit $eflags +... +--- +name: opt_redundant_flags_1 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_1 + ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags + ; CHECK-NEXT: $eax = COPY [[SUB32rr]] + ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags + %0:gr32 = COPY $esi + %1:gr32 = COPY $edi + %2:gr32 = SUB32rr %0, %1, implicit-def dead $eflags + $eax = COPY %2 + ; CMP should be removed. + CMP32rr %1, %0, implicit-def $eflags + $bl = SETCCr 3, implicit $eflags +... +--- +name: opt_redundant_flags_2 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_2 + ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY]], [[COPY1]], implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags + ; CHECK-NEXT: $eax = COPY [[SUB32rr]] + ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + %0:gr32 = COPY $esi + %1:gr32 = COPY $edi + %2:gr32 = SUB32rr %0, %1, implicit-def $eflags + ; an extra eflags reader shouldn't stop optimization. + $cl = SETCCr 2, implicit $eflags + $eax = COPY %2 + CMP32rr %0, %1, implicit-def $eflags + $bl = SETCCr 2, implicit $eflags +...