forked from OSchip/llvm-project
X86InstrInfo: Optimize more combinations of SUB+CMP
`X86InstrInfo::optimizeCompareInstr` would only optimize a `SUB` followed by a `CMP` in `isRedundantFlagInstr`. This extends the code to also look for other combinations like `CMP`+`CMP`, `TEST`+`TEST`, `SUB x,0`+`TEST`. - Change `isRedundantFlagInstr` to run `analyzeCompareInstr` on the candidate instruction and compare the results. This normalizes things and gives consistent results for various comparisons (`CMP x, y`, `SUB x, y`) and immediate cases (`TEST x, x`, `SUB x, 0`, `CMP x, 0`...). - Turn `isRedundantFlagInstr` into a member function so it can call `analyzeCompare`. - We now also check `isRedundantFlagInstr` even if `IsCmpZero` is true, since we now have cases like `TEST`+`TEST`. Differential Revision: https://reviews.llvm.org/D110865
This commit is contained in:
parent
e50f02ba7e
commit
97a1570d8c
|
@ -4011,42 +4011,72 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Check whether the first instruction, whose only
|
||||
/// purpose is to update flags, can be made redundant.
|
||||
/// CMPrr can be made redundant by SUBrr if the operands are the same.
|
||||
/// This function can be extended later on.
|
||||
/// SrcReg, SrcRegs: register operands for FlagI.
|
||||
/// ImmValue: immediate for FlagI if it takes an immediate.
|
||||
inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
|
||||
bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
|
||||
Register SrcReg, Register SrcReg2,
|
||||
int64_t ImmMask, int64_t ImmValue,
|
||||
const MachineInstr &OI) {
|
||||
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
|
||||
(FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
|
||||
(FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
|
||||
(FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) &&
|
||||
((OI.getOperand(1).getReg() == SrcReg &&
|
||||
OI.getOperand(2).getReg() == SrcReg2) ||
|
||||
(OI.getOperand(1).getReg() == SrcReg2 &&
|
||||
OI.getOperand(2).getReg() == SrcReg)))
|
||||
return true;
|
||||
|
||||
if (ImmMask != 0 &&
|
||||
((FlagI.getOpcode() == X86::CMP64ri32 &&
|
||||
OI.getOpcode() == X86::SUB64ri32) ||
|
||||
(FlagI.getOpcode() == X86::CMP64ri8 &&
|
||||
OI.getOpcode() == X86::SUB64ri8) ||
|
||||
(FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) ||
|
||||
(FlagI.getOpcode() == X86::CMP32ri8 &&
|
||||
OI.getOpcode() == X86::SUB32ri8) ||
|
||||
(FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) ||
|
||||
(FlagI.getOpcode() == X86::CMP16ri8 &&
|
||||
OI.getOpcode() == X86::SUB16ri8) ||
|
||||
(FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) &&
|
||||
OI.getOperand(1).getReg() == SrcReg &&
|
||||
OI.getOperand(2).getImm() == ImmValue)
|
||||
return true;
|
||||
const MachineInstr &OI,
|
||||
bool *IsSwapped) const {
|
||||
switch (OI.getOpcode()) {
|
||||
case X86::CMP64rr:
|
||||
case X86::CMP32rr:
|
||||
case X86::CMP16rr:
|
||||
case X86::CMP8rr:
|
||||
case X86::SUB64rr:
|
||||
case X86::SUB32rr:
|
||||
case X86::SUB16rr:
|
||||
case X86::SUB8rr: {
|
||||
Register OISrcReg;
|
||||
Register OISrcReg2;
|
||||
int64_t OIMask;
|
||||
int64_t OIValue;
|
||||
if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
|
||||
OIMask != ImmMask || OIValue != ImmValue)
|
||||
return false;
|
||||
if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
|
||||
*IsSwapped = false;
|
||||
return true;
|
||||
}
|
||||
if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
|
||||
*IsSwapped = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case X86::CMP64ri32:
|
||||
case X86::CMP64ri8:
|
||||
case X86::CMP32ri:
|
||||
case X86::CMP32ri8:
|
||||
case X86::CMP16ri:
|
||||
case X86::CMP16ri8:
|
||||
case X86::CMP8ri:
|
||||
case X86::SUB64ri32:
|
||||
case X86::SUB64ri8:
|
||||
case X86::SUB32ri:
|
||||
case X86::SUB32ri8:
|
||||
case X86::SUB16ri:
|
||||
case X86::SUB16ri8:
|
||||
case X86::SUB8ri:
|
||||
case X86::TEST64rr:
|
||||
case X86::TEST32rr:
|
||||
case X86::TEST16rr:
|
||||
case X86::TEST8rr: {
|
||||
if (ImmMask != 0) {
|
||||
Register OISrcReg;
|
||||
Register OISrcReg2;
|
||||
int64_t OIMask;
|
||||
int64_t OIValue;
|
||||
if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
|
||||
SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) {
|
||||
assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister &&
|
||||
"should not have 2nd register");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return FlagI.isIdenticalTo(OI);
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether the definition can be converted
|
||||
|
@ -4275,12 +4305,19 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
|
||||
bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
|
||||
|
||||
// Transformation currently requires SSA values.
|
||||
if (SrcReg2.isPhysical())
|
||||
return false;
|
||||
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
|
||||
assert(SrcRegDef && "Must have a definition (SSA)");
|
||||
|
||||
MachineInstr *MI = nullptr;
|
||||
MachineInstr *Sub = nullptr;
|
||||
MachineInstr *Movr0Inst = nullptr;
|
||||
bool NoSignFlag = false;
|
||||
bool ClearsOverflowFlag = false;
|
||||
bool ShouldUpdateCC = false;
|
||||
bool IsSwapped = false;
|
||||
X86::CondCode NewCC = X86::COND_INVALID;
|
||||
|
||||
// Search backward from CmpInstr for the next instruction defining EFLAGS.
|
||||
|
@ -4288,8 +4325,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
|
||||
MachineBasicBlock::reverse_iterator From =
|
||||
std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
|
||||
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
|
||||
assert(SrcRegDef && "Must have a definition (SSA)");
|
||||
for (MachineBasicBlock *MBB = &CmpMBB;;) {
|
||||
for (MachineInstr &Inst : make_range(From, MBB->rend())) {
|
||||
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
|
||||
|
@ -4329,8 +4364,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
// sub x, y or cmp x, y
|
||||
// ... // EFLAGS not changed
|
||||
// cmp x, y // <-- can be removed
|
||||
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
|
||||
CmpMask, CmpValue, Inst)) {
|
||||
if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
|
||||
Inst, &IsSwapped)) {
|
||||
Sub = &Inst;
|
||||
break;
|
||||
}
|
||||
|
@ -4360,10 +4395,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
From = MBB->rbegin();
|
||||
}
|
||||
|
||||
bool IsSwapped =
|
||||
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
|
||||
Sub->getOperand(2).getReg() == SrcReg);
|
||||
|
||||
// Scan forward from the instruction after CmpInstr for uses of EFLAGS.
|
||||
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
|
||||
// If we are done with the basic block, we need to check whether EFLAGS is
|
||||
|
@ -4386,7 +4417,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
|
||||
// EFLAGS is used by this instruction.
|
||||
X86::CondCode OldCC = X86::COND_INVALID;
|
||||
if (IsCmpZero || IsSwapped) {
|
||||
if (MI || IsSwapped) {
|
||||
// We decode the condition code from opcode.
|
||||
if (Instr.isBranch())
|
||||
OldCC = X86::getCondFromBranch(Instr);
|
||||
|
@ -4398,7 +4429,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
if (OldCC == X86::COND_INVALID) return false;
|
||||
}
|
||||
X86::CondCode ReplacementCC = X86::COND_INVALID;
|
||||
if (IsCmpZero) {
|
||||
if (MI) {
|
||||
switch (OldCC) {
|
||||
default: break;
|
||||
case X86::COND_A: case X86::COND_AE:
|
||||
|
@ -4456,14 +4487,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
|
||||
// If EFLAGS is not killed nor re-defined, we should check whether it is
|
||||
// live-out. If it is live-out, do not optimize.
|
||||
if ((IsCmpZero || IsSwapped) && !IsSafe) {
|
||||
if ((MI || IsSwapped) && !IsSafe) {
|
||||
for (MachineBasicBlock *Successor : CmpMBB.successors())
|
||||
if (Successor->isLiveIn(X86::EFLAGS))
|
||||
return false;
|
||||
}
|
||||
|
||||
// The instruction to be updated is either Sub or MI.
|
||||
Sub = IsCmpZero ? MI : Sub;
|
||||
assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set");
|
||||
Sub = MI != nullptr ? MI : Sub;
|
||||
MachineBasicBlock *SubBB = Sub->getParent();
|
||||
// Move Movr0Inst to the appropriate place before Sub.
|
||||
if (Movr0Inst) {
|
||||
|
|
|
@ -626,6 +626,21 @@ private:
|
|||
unsigned &SrcOpIdx1,
|
||||
unsigned &SrcOpIdx2,
|
||||
bool IsIntrinsic = false) const;
|
||||
|
||||
/// Returns true when instruction \p FlagI produces the same flags as \p OI.
|
||||
/// The caller should pass in the results of calling analyzeCompare on \p OI:
|
||||
/// \p SrcReg, \p SrcReg2, \p ImmMask, \p ImmValue.
|
||||
/// If the flags match \p OI as if it had the input operands swapped then the
|
||||
/// function succeeds and sets \p IsSwapped to true.
|
||||
///
|
||||
/// Examples of OI, FlagI pairs returning true:
|
||||
/// CMP %1, 42 and CMP %1, 42
|
||||
/// CMP %1, %2 and %3 = SUB %1, %2
|
||||
/// TEST %1, %1 and %2 = SUB %1, 0
|
||||
/// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true
|
||||
bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg,
|
||||
Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
|
||||
const MachineInstr &OI, bool *IsSwapped) const;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
|
|
@ -67,7 +67,6 @@ define i16 @main_bb_2E_i9_2E_i_2E_i932_2E_ce(%struct.list* %l_addr.01.0.i2.i.i92
|
|||
; CHECK-NEXT: LBB0_6: ## %NodeBlock
|
||||
; CHECK-NEXT: js LBB0_9
|
||||
; CHECK-NEXT: ## %bb.7: ## %LeafBlock1
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK-NEXT: jne LBB0_3
|
||||
; CHECK-NEXT: ## %bb.8: ## %bb12.i.i935.exitStub
|
||||
; CHECK-NEXT: movl %edi, (%esi)
|
||||
|
|
|
@ -212,8 +212,7 @@ body: |
|
|||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
|
||||
; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
|
||||
%0:gr32 = COPY $esi
|
||||
%1:gr32 = COPY $edi
|
||||
CMP32rr %0, %1, implicit-def $eflags
|
||||
|
@ -231,7 +230,6 @@ body: |
|
|||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
|
||||
%0:gr64 = COPY $rsi
|
||||
%1:gr64 = COPY $rdi
|
||||
|
@ -249,7 +247,6 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $ax
|
||||
; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
|
||||
%0:gr16 = COPY $ax
|
||||
TEST16rr %0, %0, implicit-def $eflags
|
||||
|
@ -267,8 +264,7 @@ body: |
|
|||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
|
||||
; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
|
||||
%0:gr32 = COPY $esi
|
||||
%1:gr32 = COPY $edi
|
||||
CMP32rr %0, %1, implicit-def $eflags
|
||||
|
@ -285,7 +281,6 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
|
||||
; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
|
||||
%0:gr32 = COPY $esi
|
||||
CMP32ri %0, -12345, implicit-def $eflags
|
||||
|
@ -323,7 +318,6 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
|
||||
; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
|
||||
%0:gr32 = COPY $esi
|
||||
CMP32ri8 %0, 0, implicit-def $eflags
|
||||
|
@ -340,7 +334,6 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
|
||||
; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
|
||||
; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags
|
||||
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
|
||||
%0:gr32 = COPY $esi
|
||||
TEST32rr %0, %0, implicit-def $eflags
|
||||
|
@ -359,7 +352,6 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
|
||||
; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 7, implicit $eflags
|
||||
; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
|
||||
; CHECK-NEXT: $cl = SETCCr 3, implicit $eflags
|
||||
%0:gr64 = COPY $rsi
|
||||
CMP64ri32 %0, @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
|
||||
|
|
|
@ -15,7 +15,6 @@ define fastcc i32 @_Z18yy_get_next_bufferv() nounwind {
|
|||
; CHECK-NEXT: jne .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_3: # %bb158
|
||||
; CHECK-NEXT: movb %al, 0
|
||||
; CHECK-NEXT: cmpl $-1, %eax
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue