forked from OSchip/llvm-project
X86InstrInfo: Look across basic blocks in optimizeCompareInstr
This extends `optimizeCompareInstr` to continue the backwards search when it reached the beginning of a basic block. If there is a single predecessor block then we can just continue the search in that block and mark the EFLAGS register as live-in. Differential Revision: https://reviews.llvm.org/D110862
This commit is contained in:
parent
683994c863
commit
4b75d674f8
|
@ -4295,65 +4295,75 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
||||||
std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
|
std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
|
||||||
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
|
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
|
||||||
assert(SrcRegDef && "Must have a definition (SSA)");
|
assert(SrcRegDef && "Must have a definition (SSA)");
|
||||||
for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) {
|
for (MachineBasicBlock *MBB = &CmpMBB;;) {
|
||||||
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
|
for (MachineInstr &Inst : make_range(From, MBB->rend())) {
|
||||||
// %eax = addl ...
|
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
|
||||||
// ... // EFLAGS not changed
|
// %eax = addl ...
|
||||||
// testl %eax, %eax // <-- can be removed
|
// ... // EFLAGS not changed
|
||||||
if (&Inst == SrcRegDef) {
|
// testl %eax, %eax // <-- can be removed
|
||||||
if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
|
if (&Inst == SrcRegDef) {
|
||||||
MI = &Inst;
|
if (IsCmpZero &&
|
||||||
break;
|
isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
|
||||||
}
|
|
||||||
// Cannot find other candidates before definition of SrcReg.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
|
|
||||||
// Try to use EFLAGS produced by an instruction reading %SrcReg. Example:
|
|
||||||
// %eax = ...
|
|
||||||
// ...
|
|
||||||
// popcntl %eax
|
|
||||||
// ... // EFLAGS not changed
|
|
||||||
// testl %eax, %eax // <-- can be removed
|
|
||||||
if (IsCmpZero) {
|
|
||||||
NewCC = isUseDefConvertible(Inst);
|
|
||||||
if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
|
|
||||||
Inst.getOperand(1).getReg() == SrcReg) {
|
|
||||||
ShouldUpdateCC = true;
|
|
||||||
MI = &Inst;
|
MI = &Inst;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// Cannot find other candidates before definition of SrcReg.
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to use EFLAGS from an instruction with similar flag results.
|
if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
|
||||||
// Example:
|
// Try to use EFLAGS produced by an instruction reading %SrcReg.
|
||||||
// sub x, y or cmp x, y
|
// Example:
|
||||||
// ... // EFLAGS not changed
|
// %eax = ...
|
||||||
// cmp x, y // <-- can be removed
|
// ...
|
||||||
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
|
// popcntl %eax
|
||||||
CmpValue, Inst)) {
|
// ... // EFLAGS not changed
|
||||||
Sub = &Inst;
|
// testl %eax, %eax // <-- can be removed
|
||||||
break;
|
if (IsCmpZero) {
|
||||||
}
|
NewCC = isUseDefConvertible(Inst);
|
||||||
|
if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
|
||||||
|
Inst.getOperand(1).getReg() == SrcReg) {
|
||||||
|
ShouldUpdateCC = true;
|
||||||
|
MI = &Inst;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MOV32r0 is implemented with xor which clobbers condition code. It is
|
// Try to use EFLAGS from an instruction with similar flag results.
|
||||||
// safe to move up, if the definition to EFLAGS is dead and earlier
|
// Example:
|
||||||
// instructions do not read or write EFLAGS.
|
// sub x, y or cmp x, y
|
||||||
if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
|
// ... // EFLAGS not changed
|
||||||
Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
|
// cmp x, y // <-- can be removed
|
||||||
Movr0Inst = &Inst;
|
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
|
||||||
continue;
|
CmpMask, CmpValue, Inst)) {
|
||||||
}
|
Sub = &Inst;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Cannot do anything for any other EFLAG changes.
|
// MOV32r0 is implemented with xor which clobbers condition code. It is
|
||||||
return false;
|
// safe to move up, if the definition to EFLAGS is dead and earlier
|
||||||
|
// instructions do not read or write EFLAGS.
|
||||||
|
if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
|
||||||
|
Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
|
||||||
|
Movr0Inst = &Inst;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cannot do anything for any other EFLAG changes.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Return false if no candidates exist.
|
if (MI || Sub)
|
||||||
if (!MI && !Sub)
|
break;
|
||||||
return false;
|
|
||||||
|
// Reached begin of basic block. Continue in predecessor if there is
|
||||||
|
// exactly one.
|
||||||
|
if (MBB->pred_size() != 1)
|
||||||
|
return false;
|
||||||
|
MBB = *MBB->pred_begin();
|
||||||
|
From = MBB->rbegin();
|
||||||
|
}
|
||||||
|
|
||||||
bool IsSwapped =
|
bool IsSwapped =
|
||||||
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
|
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
|
||||||
|
@ -4459,8 +4469,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
||||||
|
|
||||||
// The instruction to be updated is either Sub or MI.
|
// The instruction to be updated is either Sub or MI.
|
||||||
Sub = IsCmpZero ? MI : Sub;
|
Sub = IsCmpZero ? MI : Sub;
|
||||||
|
MachineBasicBlock *SubBB = Sub->getParent();
|
||||||
// Move Movr0Inst to the appropriate place before Sub.
|
// Move Movr0Inst to the appropriate place before Sub.
|
||||||
if (Movr0Inst) {
|
if (Movr0Inst) {
|
||||||
|
// Only move within the same block so we don't accidentally move to a
|
||||||
|
// block with higher execution frequency.
|
||||||
|
if (&CmpMBB != SubBB)
|
||||||
|
return false;
|
||||||
// Look backwards until we find a def that doesn't use the current EFLAGS.
|
// Look backwards until we find a def that doesn't use the current EFLAGS.
|
||||||
MachineBasicBlock::reverse_iterator InsertI = Sub,
|
MachineBasicBlock::reverse_iterator InsertI = Sub,
|
||||||
InsertE = Sub->getParent()->rend();
|
InsertE = Sub->getParent()->rend();
|
||||||
|
@ -4468,7 +4483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
||||||
MachineInstr *Instr = &*InsertI;
|
MachineInstr *Instr = &*InsertI;
|
||||||
if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
|
if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
|
||||||
Instr->modifiesRegister(X86::EFLAGS, TRI)) {
|
Instr->modifiesRegister(X86::EFLAGS, TRI)) {
|
||||||
Sub->getParent()->remove(Movr0Inst);
|
Movr0Inst->getParent()->remove(Movr0Inst);
|
||||||
Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
|
Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
|
||||||
Movr0Inst);
|
Movr0Inst);
|
||||||
break;
|
break;
|
||||||
|
@ -4490,6 +4505,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
||||||
Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
|
Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
|
||||||
.setImm(Op.second);
|
.setImm(Op.second);
|
||||||
}
|
}
|
||||||
|
// Add EFLAGS to block live-ins between CmpBB and block of flags producer.
|
||||||
|
for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
|
||||||
|
MBB = *MBB->pred_begin()) {
|
||||||
|
assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
|
||||||
|
if (!MBB->isLiveIn(X86::EFLAGS))
|
||||||
|
MBB->addLiveIn(X86::EFLAGS);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -130,20 +130,21 @@ define i32 @func_m(i32 %a, i32 %b) nounwind {
|
||||||
ret i32 %cond
|
ret i32 %cond
|
||||||
}
|
}
|
||||||
|
|
||||||
; If EFLAGS is live-out, we can't remove cmp if there exists
|
; (This used to test that an unsafe removal of cmp in bb.0 is not happening,
|
||||||
; a swapped sub.
|
; but now we can do so safely).
|
||||||
define i32 @func_l2(i32 %a, i32 %b) nounwind {
|
define i32 @func_l2(i32 %a, i32 %b) nounwind {
|
||||||
; CHECK-LABEL: func_l2:
|
; CHECK-LABEL: func_l2:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: movl %ecx, %eax
|
; CHECK-NEXT: movl %eax, %ecx
|
||||||
; CHECK-NEXT: subl %edx, %eax
|
; CHECK-NEXT: subl %edx, %ecx
|
||||||
; CHECK-NEXT: jne .LBB8_2
|
; CHECK-NEXT: jne .LBB8_2
|
||||||
; CHECK-NEXT: # %bb.1: # %if.then
|
; CHECK-NEXT: # %bb.1: # %if.then
|
||||||
; CHECK-NEXT: cmpl %ecx, %edx
|
; CHECK-NEXT: cmovll %ecx, %eax
|
||||||
; CHECK-NEXT: cmovlel %ecx, %eax
|
; CHECK-NEXT: retl
|
||||||
; CHECK-NEXT: .LBB8_2: # %if.else
|
; CHECK-NEXT: .LBB8_2: # %if.else
|
||||||
|
; CHECK-NEXT: movl %ecx, %eax
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%cmp = icmp eq i32 %b, %a
|
%cmp = icmp eq i32 %b, %a
|
||||||
%sub = sub nsw i32 %a, %b
|
%sub = sub nsw i32 %a, %b
|
||||||
|
|
|
@ -32,6 +32,108 @@ body: |
|
||||||
$al = SETCCr 4, implicit $eflags
|
$al = SETCCr 4, implicit $eflags
|
||||||
...
|
...
|
||||||
---
|
---
|
||||||
|
name: opt_multiple_blocks
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
; CHECK-LABEL: name: opt_multiple_blocks
|
||||||
|
; CHECK: bb.0:
|
||||||
|
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
|
||||||
|
; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
|
||||||
|
; CHECK-NEXT: PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
|
||||||
|
; CHECK-NEXT: $rcx = POP64r implicit-def $rsp, implicit $rsp
|
||||||
|
; CHECK-NEXT: JMP_1 %bb.1
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: bb.1:
|
||||||
|
; CHECK-NEXT: liveins: $eflags
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
|
||||||
|
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
|
||||||
|
bb.0:
|
||||||
|
%0:gr64 = COPY undef $rsi
|
||||||
|
%1:gr64 = INC64r %0, implicit-def $eflags
|
||||||
|
PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp
|
||||||
|
$rcx = POP64r implicit-def $rsp, implicit $rsp
|
||||||
|
JMP_1 %bb.1
|
||||||
|
|
||||||
|
bb.1:
|
||||||
|
; TEST should be removed.
|
||||||
|
TEST64rr %1, %1, implicit-def $eflags
|
||||||
|
%2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
|
||||||
|
$al = SETCCr 4, implicit $eflags
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: opt_multiple_blocks_noopt_0
|
||||||
|
body: |
|
||||||
|
; CHECK-LABEL: name: opt_multiple_blocks_noopt_0
|
||||||
|
; CHECK: bb.0:
|
||||||
|
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
|
||||||
|
; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags
|
||||||
|
; CHECK-NEXT: JMP_1 %bb.1
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: bb.1:
|
||||||
|
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: TEST64rr [[INC64r]], [[INC64r]], implicit-def $eflags
|
||||||
|
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg
|
||||||
|
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
|
||||||
|
; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: bb.2:
|
||||||
|
bb.0:
|
||||||
|
%0:gr64 = COPY undef $rsi
|
||||||
|
%1:gr64 = INC64r %0, implicit-def $eflags
|
||||||
|
JMP_1 %bb.1
|
||||||
|
|
||||||
|
bb.1:
|
||||||
|
; The TEST64rr should not be removed, since there are multiple preds.
|
||||||
|
TEST64rr %1, %1, implicit-def $eflags
|
||||||
|
%2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg
|
||||||
|
$al = SETCCr 4, implicit $eflags
|
||||||
|
JCC_1 %bb.1, 2, implicit $eflags
|
||||||
|
|
||||||
|
bb.2:
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: opt_multiple_blocks_noopt_1
|
||||||
|
body: |
|
||||||
|
; CHECK-LABEL: name: opt_multiple_blocks_noopt_1
|
||||||
|
; CHECK: bb.0:
|
||||||
|
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: JMP_1 %bb.1
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: bb.1:
|
||||||
|
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi
|
||||||
|
; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
|
||||||
|
; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: bb.2:
|
||||||
|
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
|
||||||
|
; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags
|
||||||
|
; CHECK-NEXT: $al = SETCCr 4, implicit $eflags
|
||||||
|
bb.0:
|
||||||
|
JMP_1 %bb.1
|
||||||
|
|
||||||
|
bb.1:
|
||||||
|
%0:gr64 = COPY undef $rsi
|
||||||
|
TEST64rr %0, %0, implicit-def $eflags
|
||||||
|
JCC_1 %bb.1, 2, implicit $eflags
|
||||||
|
|
||||||
|
bb.2:
|
||||||
|
; We should not move MOV32r0 up into the loop (that would be correct but
|
||||||
|
; slow).
|
||||||
|
%1:gr32 = MOV32r0 implicit-def $eflags
|
||||||
|
; TEST should not be removed because of MOV32r0.
|
||||||
|
TEST64rr %0, %0, implicit-def $eflags
|
||||||
|
$al = SETCCr 4, implicit $eflags
|
||||||
|
...
|
||||||
|
---
|
||||||
name: opt_zerocmp_user_0
|
name: opt_zerocmp_user_0
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
Loading…
Reference in New Issue