forked from OSchip/llvm-project
X86InstrInfo: Look across basic blocks in optimizeCompareInstr
This extends `optimizeCompareInstr` to continue the backwards search when it reached the beginning of a basic block. If there is a single predecessor block then we can just continue the search in that block and mark the EFLAGS register as live-in. Differential Revision: https://reviews.llvm.org/D110862
This commit is contained in:
parent
683994c863
commit
4b75d674f8
|
@ -4295,65 +4295,75 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
|
||||
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
|
||||
assert(SrcRegDef && "Must have a definition (SSA)");
|
||||
for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) {
|
||||
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
|
||||
// %eax = addl ...
|
||||
// ... // EFLAGS not changed
|
||||
// testl %eax, %eax // <-- can be removed
|
||||
if (&Inst == SrcRegDef) {
|
||||
if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
|
||||
MI = &Inst;
|
||||
break;
|
||||
}
|
||||
// Cannot find other candidates before definition of SrcReg.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
|
||||
// Try to use EFLAGS produced by an instruction reading %SrcReg. Example:
|
||||
// %eax = ...
|
||||
// ...
|
||||
// popcntl %eax
|
||||
// ... // EFLAGS not changed
|
||||
// testl %eax, %eax // <-- can be removed
|
||||
if (IsCmpZero) {
|
||||
NewCC = isUseDefConvertible(Inst);
|
||||
if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
|
||||
Inst.getOperand(1).getReg() == SrcReg) {
|
||||
ShouldUpdateCC = true;
|
||||
for (MachineBasicBlock *MBB = &CmpMBB;;) {
|
||||
for (MachineInstr &Inst : make_range(From, MBB->rend())) {
|
||||
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
|
||||
// %eax = addl ...
|
||||
// ... // EFLAGS not changed
|
||||
// testl %eax, %eax // <-- can be removed
|
||||
if (&Inst == SrcRegDef) {
|
||||
if (IsCmpZero &&
|
||||
isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
|
||||
MI = &Inst;
|
||||
break;
|
||||
}
|
||||
// Cannot find other candidates before definition of SrcReg.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to use EFLAGS from an instruction with similar flag results.
|
||||
// Example:
|
||||
// sub x, y or cmp x, y
|
||||
// ... // EFLAGS not changed
|
||||
// cmp x, y // <-- can be removed
|
||||
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
|
||||
CmpValue, Inst)) {
|
||||
Sub = &Inst;
|
||||
break;
|
||||
}
|
||||
if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
|
||||
// Try to use EFLAGS produced by an instruction reading %SrcReg.
|
||||
// Example:
|
||||
// %eax = ...
|
||||
// ...
|
||||
// popcntl %eax
|
||||
// ... // EFLAGS not changed
|
||||
// testl %eax, %eax // <-- can be removed
|
||||
if (IsCmpZero) {
|
||||
NewCC = isUseDefConvertible(Inst);
|
||||
if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
|
||||
Inst.getOperand(1).getReg() == SrcReg) {
|
||||
ShouldUpdateCC = true;
|
||||
MI = &Inst;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// MOV32r0 is implemented with xor which clobbers condition code. It is
|
||||
// safe to move up, if the definition to EFLAGS is dead and earlier
|
||||
// instructions do not read or write EFLAGS.
|
||||
if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
|
||||
Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
|
||||
Movr0Inst = &Inst;
|
||||
continue;
|
||||
}
|
||||
// Try to use EFLAGS from an instruction with similar flag results.
|
||||
// Example:
|
||||
// sub x, y or cmp x, y
|
||||
// ... // EFLAGS not changed
|
||||
// cmp x, y // <-- can be removed
|
||||
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
|
||||
CmpMask, CmpValue, Inst)) {
|
||||
Sub = &Inst;
|
||||
break;
|
||||
}
|
||||
|
||||
// Cannot do anything for any other EFLAG changes.
|
||||
return false;
|
||||
// MOV32r0 is implemented with xor which clobbers condition code. It is
|
||||
// safe to move up, if the definition to EFLAGS is dead and earlier
|
||||
// instructions do not read or write EFLAGS.
|
||||
if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
|
||||
Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
|
||||
Movr0Inst = &Inst;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cannot do anything for any other EFLAG changes.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return false if no candidates exist.
|
||||
if (!MI && !Sub)
|
||||
return false;
|
||||
if (MI || Sub)
|
||||
break;
|
||||
|
||||
// Reached begin of basic block. Continue in predecessor if there is
|
||||
// exactly one.
|
||||
if (MBB->pred_size() != 1)
|
||||
return false;
|
||||
MBB = *MBB->pred_begin();
|
||||
From = MBB->rbegin();
|
||||
}
|
||||
|
||||
bool IsSwapped =
|
||||
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
|
||||
|
@ -4459,8 +4469,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
|
||||
// The instruction to be updated is either Sub or MI.
|
||||
Sub = IsCmpZero ? MI : Sub;
|
||||
MachineBasicBlock *SubBB = Sub->getParent();
|
||||
// Move Movr0Inst to the appropriate place before Sub.
|
||||
if (Movr0Inst) {
|
||||
// Only move within the same block so we don't accidentally move to a
|
||||
// block with higher execution frequency.
|
||||
if (&CmpMBB != SubBB)
|
||||
return false;
|
||||
// Look backwards until we find a def that doesn't use the current EFLAGS.
|
||||
MachineBasicBlock::reverse_iterator InsertI = Sub,
|
||||
InsertE = Sub->getParent()->rend();
|
||||
|
@ -4468,7 +4483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
MachineInstr *Instr = &*InsertI;
|
||||
if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
|
||||
Instr->modifiesRegister(X86::EFLAGS, TRI)) {
|
||||
Sub->getParent()->remove(Movr0Inst);
|
||||
Movr0Inst->getParent()->remove(Movr0Inst);
|
||||
Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
|
||||
Movr0Inst);
|
||||
break;
|
||||
|
@ -4490,6 +4505,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
|
||||
.setImm(Op.second);
|
||||
}
|
||||
// Add EFLAGS to block live-ins between CmpBB and block of flags producer.
|
||||
for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
|
||||
MBB = *MBB->pred_begin()) {
|
||||
assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
|
||||
if (!MBB->isLiveIn(X86::EFLAGS))
|
||||
MBB->addLiveIn(X86::EFLAGS);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -130,20 +130,21 @@ define i32 @func_m(i32 %a, i32 %b) nounwind {
|
|||
ret i32 %cond
|
||||
}
|
||||
|
||||
; If EFLAGS is live-out, we can't remove cmp if there exists
|
||||
; a swapped sub.
|
||||
; (This used to test that an unsafe removal of cmp in bb.0 is not happening,
|
||||
; but now we can do so safely).
|
||||
define i32 @func_l2(i32 %a, i32 %b) nounwind {
|
||||
; CHECK-LABEL: func_l2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl %ecx, %eax
|
||||
; CHECK-NEXT: subl %edx, %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl %eax, %ecx
|
||||
; CHECK-NEXT: subl %edx, %ecx
|
||||
; CHECK-NEXT: jne .LBB8_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: cmpl %ecx, %edx
|
||||
; CHECK-NEXT: cmovlel %ecx, %eax
|
||||
; CHECK-NEXT: cmovll %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
; CHECK-NEXT: .LBB8_2: # %if.else
|
||||
; CHECK-NEXT: movl %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%cmp = icmp eq i32 %b, %a
|
||||
%sub = sub nsw i32 %a, %b
|
||||
|
|
|
@ -32,6 +32,108 @@ body: |
|
|||
$al = SETCCr 4, implicit $eflags
|
||||
...
|
||||
---
|
||||
name: opt_multiple_blocks
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: opt_multiple_blocks
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
|
||||
; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
|
||||
; CHECK-NEXT: $rcx = POP64r implicit-def $rsp, implicit $rsp
|
||||
; CHECK-NEXT: JMP_1 %bb.1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: liveins: $eflags
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
|
||||
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
|
||||
bb.0:
|
||||
%0:gr64 = COPY undef $rsi
|
||||
%1:gr64 = INC64r %0, implicit-def $eflags
|
||||
PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
|
||||
$rcx = POP64r implicit-def $rsp, implicit $rsp
|
||||
JMP_1 %bb.1
|
||||
|
||||
bb.1:
|
||||
; TEST should be removed.
|
||||
TEST64rr %1, %1, implicit-def $eflags
|
||||
%2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
|
||||
$al = SETCCr 4, implicit $eflags
|
||||
...
|
||||
---
|
||||
name: opt_multiple_blocks_noopt_0
|
||||
body: |
|
||||
; CHECK-LABEL: name: opt_multiple_blocks_noopt_0
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
|
||||
; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: JMP_1 %bb.1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: TEST64rr [[INC64r]], [[INC64r]], implicit-def $eflags
|
||||
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
|
||||
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
|
||||
; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
bb.0:
|
||||
%0:gr64 = COPY undef $rsi
|
||||
%1:gr64 = INC64r %0, implicit-def $eflags
|
||||
JMP_1 %bb.1
|
||||
|
||||
bb.1:
|
||||
; The TEST64rr should not be removed, since there are multiple preds.
|
||||
TEST64rr %1, %1, implicit-def $eflags
|
||||
%2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
|
||||
$al = SETCCr 4, implicit $eflags
|
||||
JCC_1 %bb.1, 2, implicit $eflags
|
||||
|
||||
bb.2:
|
||||
...
|
||||
---
|
||||
name: opt_multiple_blocks_noopt_1
|
||||
body: |
|
||||
; CHECK-LABEL: name: opt_multiple_blocks_noopt_1
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: JMP_1 %bb.1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
|
||||
; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
|
||||
; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
|
||||
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
|
||||
bb.0:
|
||||
JMP_1 %bb.1
|
||||
|
||||
bb.1:
|
||||
%0:gr64 = COPY undef $rsi
|
||||
TEST64rr %0, %0, implicit-def $eflags
|
||||
JCC_1 %bb.1, 2, implicit $eflags
|
||||
|
||||
bb.2:
|
||||
; We should not move MOV32r0 up into the loop (that would be correct but
|
||||
; slow).
|
||||
%1:gr32 = MOV32r0 implicit-def $eflags
|
||||
; TEST should not be removed because of MOV32r0.
|
||||
TEST64rr %0, %0, implicit-def $eflags
|
||||
$al = SETCCr 4, implicit $eflags
|
||||
...
|
||||
---
|
||||
name: opt_zerocmp_user_0
|
||||
body: |
|
||||
bb.0:
|
||||
|
|
Loading…
Reference in New Issue