X86InstrInfo: Look across basic blocks in optimizeCompareInstr

This extends `optimizeCompareInstr` to continue the backwards search
when it reached the beginning of a basic block. If there is a single
predecessor block then we can just continue the search in that block and
mark the EFLAGS register as live-in.

Differential Revision: https://reviews.llvm.org/D110862
This commit is contained in:
Matthias Braun 2021-09-24 13:41:54 -07:00
parent 683994c863
commit 4b75d674f8
3 changed files with 183 additions and 58 deletions

View File

@ -4295,65 +4295,75 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
std::next(MachineBasicBlock::reverse_iterator(CmpInstr)); std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg); MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
assert(SrcRegDef && "Must have a definition (SSA)"); assert(SrcRegDef && "Must have a definition (SSA)");
for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) { for (MachineBasicBlock *MBB = &CmpMBB;;) {
// Try to use EFLAGS from the instruction defining %SrcReg. Example: for (MachineInstr &Inst : make_range(From, MBB->rend())) {
// %eax = addl ... // Try to use EFLAGS from the instruction defining %SrcReg. Example:
// ... // EFLAGS not changed // %eax = addl ...
// testl %eax, %eax // <-- can be removed // ... // EFLAGS not changed
if (&Inst == SrcRegDef) { // testl %eax, %eax // <-- can be removed
if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) { if (&Inst == SrcRegDef) {
MI = &Inst; if (IsCmpZero &&
break; isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
}
// Cannot find other candidates before definition of SrcReg.
return false;
}
if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
// Try to use EFLAGS produced by an instruction reading %SrcReg. Example:
// %eax = ...
// ...
// popcntl %eax
// ... // EFLAGS not changed
// testl %eax, %eax // <-- can be removed
if (IsCmpZero) {
NewCC = isUseDefConvertible(Inst);
if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
Inst.getOperand(1).getReg() == SrcReg) {
ShouldUpdateCC = true;
MI = &Inst; MI = &Inst;
break; break;
} }
// Cannot find other candidates before definition of SrcReg.
return false;
} }
// Try to use EFLAGS from an instruction with similar flag results. if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
// Example: // Try to use EFLAGS produced by an instruction reading %SrcReg.
// sub x, y or cmp x, y // Example:
// ... // EFLAGS not changed // %eax = ...
// cmp x, y // <-- can be removed // ...
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, // popcntl %eax
CmpValue, Inst)) { // ... // EFLAGS not changed
Sub = &Inst; // testl %eax, %eax // <-- can be removed
break; if (IsCmpZero) {
} NewCC = isUseDefConvertible(Inst);
if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
Inst.getOperand(1).getReg() == SrcReg) {
ShouldUpdateCC = true;
MI = &Inst;
break;
}
}
// MOV32r0 is implemented with xor which clobbers condition code. It is // Try to use EFLAGS from an instruction with similar flag results.
// safe to move up, if the definition to EFLAGS is dead and earlier // Example:
// instructions do not read or write EFLAGS. // sub x, y or cmp x, y
if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 && // ... // EFLAGS not changed
Inst.registerDefIsDead(X86::EFLAGS, TRI)) { // cmp x, y // <-- can be removed
Movr0Inst = &Inst; if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
continue; CmpMask, CmpValue, Inst)) {
} Sub = &Inst;
break;
}
// Cannot do anything for any other EFLAG changes. // MOV32r0 is implemented with xor which clobbers condition code. It is
return false; // safe to move up, if the definition to EFLAGS is dead and earlier
// instructions do not read or write EFLAGS.
if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
Movr0Inst = &Inst;
continue;
}
// Cannot do anything for any other EFLAG changes.
return false;
}
} }
}
// Return false if no candidates exist. if (MI || Sub)
if (!MI && !Sub) break;
return false;
// Reached begin of basic block. Continue in predecessor if there is
// exactly one.
if (MBB->pred_size() != 1)
return false;
MBB = *MBB->pred_begin();
From = MBB->rbegin();
}
bool IsSwapped = bool IsSwapped =
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 && (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
@ -4459,8 +4469,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// The instruction to be updated is either Sub or MI. // The instruction to be updated is either Sub or MI.
Sub = IsCmpZero ? MI : Sub; Sub = IsCmpZero ? MI : Sub;
MachineBasicBlock *SubBB = Sub->getParent();
// Move Movr0Inst to the appropriate place before Sub. // Move Movr0Inst to the appropriate place before Sub.
if (Movr0Inst) { if (Movr0Inst) {
// Only move within the same block so we don't accidentally move to a
// block with higher execution frequency.
if (&CmpMBB != SubBB)
return false;
// Look backwards until we find a def that doesn't use the current EFLAGS. // Look backwards until we find a def that doesn't use the current EFLAGS.
MachineBasicBlock::reverse_iterator InsertI = Sub, MachineBasicBlock::reverse_iterator InsertI = Sub,
InsertE = Sub->getParent()->rend(); InsertE = Sub->getParent()->rend();
@ -4468,7 +4483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
MachineInstr *Instr = &*InsertI; MachineInstr *Instr = &*InsertI;
if (!Instr->readsRegister(X86::EFLAGS, TRI) && if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
Instr->modifiesRegister(X86::EFLAGS, TRI)) { Instr->modifiesRegister(X86::EFLAGS, TRI)) {
Sub->getParent()->remove(Movr0Inst); Movr0Inst->getParent()->remove(Movr0Inst);
Instr->getParent()->insert(MachineBasicBlock::iterator(Instr), Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
Movr0Inst); Movr0Inst);
break; break;
@ -4490,6 +4505,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1) Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
.setImm(Op.second); .setImm(Op.second);
} }
// Add EFLAGS to block live-ins between CmpBB and block of flags producer.
for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
MBB = *MBB->pred_begin()) {
assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
if (!MBB->isLiveIn(X86::EFLAGS))
MBB->addLiveIn(X86::EFLAGS);
}
return true; return true;
} }

View File

@ -130,20 +130,21 @@ define i32 @func_m(i32 %a, i32 %b) nounwind {
ret i32 %cond ret i32 %cond
} }
; If EFLAGS is live-out, we can't remove cmp if there exists ; (This used to test that an unsafe removal of cmp in bb.0 is not happening,
; a swapped sub. ; but now we can do so safely).
define i32 @func_l2(i32 %a, i32 %b) nounwind { define i32 @func_l2(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l2: ; CHECK-LABEL: func_l2:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: subl %edx, %eax ; CHECK-NEXT: subl %edx, %ecx
; CHECK-NEXT: jne .LBB8_2 ; CHECK-NEXT: jne .LBB8_2
; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: cmpl %ecx, %edx ; CHECK-NEXT: cmovll %ecx, %eax
; CHECK-NEXT: cmovlel %ecx, %eax ; CHECK-NEXT: retl
; CHECK-NEXT: .LBB8_2: # %if.else ; CHECK-NEXT: .LBB8_2: # %if.else
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%cmp = icmp eq i32 %b, %a %cmp = icmp eq i32 %b, %a
%sub = sub nsw i32 %a, %b %sub = sub nsw i32 %a, %b

View File

@ -32,6 +32,108 @@ body: |
$al = SETCCr 4, implicit $eflags $al = SETCCr 4, implicit $eflags
... ...
--- ---
name: opt_multiple_blocks
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: opt_multiple_blocks
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
; CHECK-NEXT: PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
; CHECK-NEXT: $rcx = POP64r implicit-def $rsp, implicit $rsp
; CHECK-NEXT: JMP_1 %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
bb.0:
%0:gr64 = COPY undef $rsi
%1:gr64 = INC64r %0, implicit-def $eflags
PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
$rcx = POP64r implicit-def $rsp, implicit $rsp
JMP_1 %bb.1
bb.1:
; TEST should be removed.
TEST64rr %1, %1, implicit-def $eflags
%2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
$al = SETCCr 4, implicit $eflags
...
---
name: opt_multiple_blocks_noopt_0
body: |
; CHECK-LABEL: name: opt_multiple_blocks_noopt_0
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
; CHECK-NEXT: JMP_1 %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: TEST64rr [[INC64r]], [[INC64r]], implicit-def $eflags
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
bb.0:
%0:gr64 = COPY undef $rsi
%1:gr64 = INC64r %0, implicit-def $eflags
JMP_1 %bb.1
bb.1:
; The TEST64rr should not be removed, since there are multiple preds.
TEST64rr %1, %1, implicit-def $eflags
%2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
$al = SETCCr 4, implicit $eflags
JCC_1 %bb.1, 2, implicit $eflags
bb.2:
...
---
name: opt_multiple_blocks_noopt_1
body: |
; CHECK-LABEL: name: opt_multiple_blocks_noopt_1
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: JMP_1 %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
bb.0:
JMP_1 %bb.1
bb.1:
%0:gr64 = COPY undef $rsi
TEST64rr %0, %0, implicit-def $eflags
JCC_1 %bb.1, 2, implicit $eflags
bb.2:
; We should not move MOV32r0 up into the loop (that would be correct but
; slow).
%1:gr32 = MOV32r0 implicit-def $eflags
; TEST should not be removed because of MOV32r0.
TEST64rr %0, %0, implicit-def $eflags
$al = SETCCr 4, implicit $eflags
...
---
name: opt_zerocmp_user_0 name: opt_zerocmp_user_0
body: | body: |
bb.0: bb.0: