X86InstrInfo: Look across basic blocks in optimizeCompareInstr

This extends `optimizeCompareInstr` to continue the backwards search
when it reached the beginning of a basic block. If there is a single
predecessor block then we can just continue the search in that block and
mark the EFLAGS register as live-in.

Differential Revision: https://reviews.llvm.org/D110862
This commit is contained in:
Matthias Braun 2021-09-24 13:41:54 -07:00
parent 683994c863
commit 4b75d674f8
3 changed files with 183 additions and 58 deletions

View File

@ -4295,65 +4295,75 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
assert(SrcRegDef && "Must have a definition (SSA)");
for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) {
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
// %eax = addl ...
// ... // EFLAGS not changed
// testl %eax, %eax // <-- can be removed
if (&Inst == SrcRegDef) {
if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
MI = &Inst;
break;
}
// Cannot find other candidates before definition of SrcReg.
return false;
}
if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
// Try to use EFLAGS produced by an instruction reading %SrcReg. Example:
// %eax = ...
// ...
// popcntl %eax
// ... // EFLAGS not changed
// testl %eax, %eax // <-- can be removed
if (IsCmpZero) {
NewCC = isUseDefConvertible(Inst);
if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
Inst.getOperand(1).getReg() == SrcReg) {
ShouldUpdateCC = true;
for (MachineBasicBlock *MBB = &CmpMBB;;) {
for (MachineInstr &Inst : make_range(From, MBB->rend())) {
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
// %eax = addl ...
// ... // EFLAGS not changed
// testl %eax, %eax // <-- can be removed
if (&Inst == SrcRegDef) {
if (IsCmpZero &&
isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
MI = &Inst;
break;
}
// Cannot find other candidates before definition of SrcReg.
return false;
}
// Try to use EFLAGS from an instruction with similar flag results.
// Example:
// sub x, y or cmp x, y
// ... // EFLAGS not changed
// cmp x, y // <-- can be removed
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
CmpValue, Inst)) {
Sub = &Inst;
break;
}
if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
// Try to use EFLAGS produced by an instruction reading %SrcReg.
// Example:
// %eax = ...
// ...
// popcntl %eax
// ... // EFLAGS not changed
// testl %eax, %eax // <-- can be removed
if (IsCmpZero) {
NewCC = isUseDefConvertible(Inst);
if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
Inst.getOperand(1).getReg() == SrcReg) {
ShouldUpdateCC = true;
MI = &Inst;
break;
}
}
// MOV32r0 is implemented with xor which clobbers condition code. It is
// safe to move up, if the definition to EFLAGS is dead and earlier
// instructions do not read or write EFLAGS.
if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
Movr0Inst = &Inst;
continue;
}
// Try to use EFLAGS from an instruction with similar flag results.
// Example:
// sub x, y or cmp x, y
// ... // EFLAGS not changed
// cmp x, y // <-- can be removed
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
CmpMask, CmpValue, Inst)) {
Sub = &Inst;
break;
}
// Cannot do anything for any other EFLAG changes.
return false;
// MOV32r0 is implemented with xor which clobbers condition code. It is
// safe to move up, if the definition to EFLAGS is dead and earlier
// instructions do not read or write EFLAGS.
if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
Movr0Inst = &Inst;
continue;
}
// Cannot do anything for any other EFLAG changes.
return false;
}
}
}
// Return false if no candidates exist.
if (!MI && !Sub)
return false;
if (MI || Sub)
break;
// Reached begin of basic block. Continue in predecessor if there is
// exactly one.
if (MBB->pred_size() != 1)
return false;
MBB = *MBB->pred_begin();
From = MBB->rbegin();
}
bool IsSwapped =
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
@ -4459,8 +4469,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// The instruction to be updated is either Sub or MI.
Sub = IsCmpZero ? MI : Sub;
MachineBasicBlock *SubBB = Sub->getParent();
// Move Movr0Inst to the appropriate place before Sub.
if (Movr0Inst) {
// Only move within the same block so we don't accidentally move to a
// block with higher execution frequency.
if (&CmpMBB != SubBB)
return false;
// Look backwards until we find a def that doesn't use the current EFLAGS.
MachineBasicBlock::reverse_iterator InsertI = Sub,
InsertE = Sub->getParent()->rend();
@ -4468,7 +4483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
MachineInstr *Instr = &*InsertI;
if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
Instr->modifiesRegister(X86::EFLAGS, TRI)) {
Sub->getParent()->remove(Movr0Inst);
Movr0Inst->getParent()->remove(Movr0Inst);
Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
Movr0Inst);
break;
@ -4490,6 +4505,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
.setImm(Op.second);
}
// Add EFLAGS to block live-ins between CmpBB and block of flags producer.
for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
MBB = *MBB->pred_begin()) {
assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
if (!MBB->isLiveIn(X86::EFLAGS))
MBB->addLiveIn(X86::EFLAGS);
}
return true;
}

View File

@ -130,20 +130,21 @@ define i32 @func_m(i32 %a, i32 %b) nounwind {
ret i32 %cond
}
; If EFLAGS is live-out, we can't remove cmp if there exists
; a swapped sub.
; (This used to test that an unsafe removal of cmp in bb.0 is not happening,
; but now we can do so safely).
define i32 @func_l2(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: subl %edx, %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: subl %edx, %ecx
; CHECK-NEXT: jne .LBB8_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: cmpl %ecx, %edx
; CHECK-NEXT: cmovlel %ecx, %eax
; CHECK-NEXT: cmovll %ecx, %eax
; CHECK-NEXT: retl
; CHECK-NEXT: .LBB8_2: # %if.else
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retl
%cmp = icmp eq i32 %b, %a
%sub = sub nsw i32 %a, %b

View File

@ -32,6 +32,108 @@ body: |
$al = SETCCr 4, implicit $eflags
...
---
name: opt_multiple_blocks
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: opt_multiple_blocks
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
; CHECK-NEXT: PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
; CHECK-NEXT: $rcx = POP64r implicit-def $rsp, implicit $rsp
; CHECK-NEXT: JMP_1 %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
bb.0:
%0:gr64 = COPY undef $rsi
%1:gr64 = INC64r %0, implicit-def $eflags
PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
$rcx = POP64r implicit-def $rsp, implicit $rsp
JMP_1 %bb.1
bb.1:
; TEST should be removed.
TEST64rr %1, %1, implicit-def $eflags
%2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
$al = SETCCr 4, implicit $eflags
...
---
name: opt_multiple_blocks_noopt_0
body: |
; CHECK-LABEL: name: opt_multiple_blocks_noopt_0
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
; CHECK-NEXT: JMP_1 %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: TEST64rr [[INC64r]], [[INC64r]], implicit-def $eflags
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
bb.0:
%0:gr64 = COPY undef $rsi
%1:gr64 = INC64r %0, implicit-def $eflags
JMP_1 %bb.1
bb.1:
; The TEST64rr should not be removed, since there are multiple preds.
TEST64rr %1, %1, implicit-def $eflags
%2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
$al = SETCCr 4, implicit $eflags
JCC_1 %bb.1, 2, implicit $eflags
bb.2:
...
---
name: opt_multiple_blocks_noopt_1
body: |
; CHECK-LABEL: name: opt_multiple_blocks_noopt_1
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: JMP_1 %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
bb.0:
JMP_1 %bb.1
bb.1:
%0:gr64 = COPY undef $rsi
TEST64rr %0, %0, implicit-def $eflags
JCC_1 %bb.1, 2, implicit $eflags
bb.2:
; We should not move MOV32r0 up into the loop (that would be correct but
; slow).
%1:gr32 = MOV32r0 implicit-def $eflags
; TEST should not be removed because of MOV32r0.
TEST64rr %0, %0, implicit-def $eflags
$al = SETCCr 4, implicit $eflags
...
---
name: opt_zerocmp_user_0
body: |
bb.0: