diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index cc2d425eadd9..9c1552ee9084 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4295,65 +4295,75 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, std::next(MachineBasicBlock::reverse_iterator(CmpInstr)); MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg); assert(SrcRegDef && "Must have a definition (SSA)"); - for (MachineInstr &Inst : make_range(From, CmpMBB.rend())) { - // Try to use EFLAGS from the instruction defining %SrcReg. Example: - // %eax = addl ... - // ... // EFLAGS not changed - // testl %eax, %eax // <-- can be removed - if (&Inst == SrcRegDef) { - if (IsCmpZero && isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) { - MI = &Inst; - break; - } - // Cannot find other candidates before definition of SrcReg. - return false; - } - - if (Inst.modifiesRegister(X86::EFLAGS, TRI)) { - // Try to use EFLAGS produced by an instruction reading %SrcReg. Example: - // %eax = ... - // ... - // popcntl %eax - // ... // EFLAGS not changed - // testl %eax, %eax // <-- can be removed - if (IsCmpZero) { - NewCC = isUseDefConvertible(Inst); - if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() && - Inst.getOperand(1).getReg() == SrcReg) { - ShouldUpdateCC = true; + for (MachineBasicBlock *MBB = &CmpMBB;;) { + for (MachineInstr &Inst : make_range(From, MBB->rend())) { + // Try to use EFLAGS from the instruction defining %SrcReg. Example: + // %eax = addl ... + // ... // EFLAGS not changed + // testl %eax, %eax // <-- can be removed + if (&Inst == SrcRegDef) { + if (IsCmpZero && + isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) { MI = &Inst; break; } + // Cannot find other candidates before definition of SrcReg. + return false; } - // Try to use EFLAGS from an instruction with similar flag results. - // Example: - // sub x, y or cmp x, y - // ... // EFLAGS not changed - // cmp x, y // <-- can be removed - if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, - CmpValue, Inst)) { - Sub = &Inst; - break; - } + if (Inst.modifiesRegister(X86::EFLAGS, TRI)) { + // Try to use EFLAGS produced by an instruction reading %SrcReg. + // Example: + // %eax = ... + // ... + // popcntl %eax + // ... // EFLAGS not changed + // testl %eax, %eax // <-- can be removed + if (IsCmpZero) { + NewCC = isUseDefConvertible(Inst); + if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() && + Inst.getOperand(1).getReg() == SrcReg) { + ShouldUpdateCC = true; + MI = &Inst; + break; + } + } - // MOV32r0 is implemented with xor which clobbers condition code. It is - // safe to move up, if the definition to EFLAGS is dead and earlier - // instructions do not read or write EFLAGS. - if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 && - Inst.registerDefIsDead(X86::EFLAGS, TRI)) { - Movr0Inst = &Inst; - continue; - } + // Try to use EFLAGS from an instruction with similar flag results. + // Example: + // sub x, y or cmp x, y + // ... // EFLAGS not changed + // cmp x, y // <-- can be removed + if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, + CmpMask, CmpValue, Inst)) { + Sub = &Inst; + break; + } - // Cannot do anything for any other EFLAG changes. - return false; + // MOV32r0 is implemented with xor which clobbers condition code. It is + // safe to move up, if the definition to EFLAGS is dead and earlier + // instructions do not read or write EFLAGS. + if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 && + Inst.registerDefIsDead(X86::EFLAGS, TRI)) { + Movr0Inst = &Inst; + continue; + } + + // Cannot do anything for any other EFLAG changes. + return false; + } } - } - // Return false if no candidates exist. - if (!MI && !Sub) - return false; + if (MI || Sub) + break; + + // Reached begin of basic block. Continue in predecessor if there is + // exactly one. + if (MBB->pred_size() != 1) + return false; + MBB = *MBB->pred_begin(); + From = MBB->rbegin(); + } bool IsSwapped = (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 && @@ -4459,8 +4469,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // The instruction to be updated is either Sub or MI. Sub = IsCmpZero ? MI : Sub; + MachineBasicBlock *SubBB = Sub->getParent(); // Move Movr0Inst to the appropriate place before Sub. if (Movr0Inst) { + // Only move within the same block so we don't accidentally move to a + // block with higher execution frequency. + if (&CmpMBB != SubBB) + return false; // Look backwards until we find a def that doesn't use the current EFLAGS. MachineBasicBlock::reverse_iterator InsertI = Sub, InsertE = Sub->getParent()->rend(); @@ -4468,7 +4483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, MachineInstr *Instr = &*InsertI; if (!Instr->readsRegister(X86::EFLAGS, TRI) && Instr->modifiesRegister(X86::EFLAGS, TRI)) { - Sub->getParent()->remove(Movr0Inst); + Movr0Inst->getParent()->remove(Movr0Inst); Instr->getParent()->insert(MachineBasicBlock::iterator(Instr), Movr0Inst); break; @@ -4490,6 +4505,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1) .setImm(Op.second); } + // Add EFLAGS to block live-ins between CmpBB and block of flags producer. + for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB; + MBB = *MBB->pred_begin()) { + assert(MBB->pred_size() == 1 && "Expected exactly one predecessor"); + if (!MBB->isLiveIn(X86::EFLAGS)) + MBB->addLiveIn(X86::EFLAGS); + } return true; } diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll index b436b1f35c8f..848ebc97a1ac 100644 --- a/llvm/test/CodeGen/X86/jump_sign.ll +++ b/llvm/test/CodeGen/X86/jump_sign.ll @@ -130,20 +130,21 @@ define i32 @func_m(i32 %a, i32 %b) nounwind { ret i32 %cond } -; If EFLAGS is live-out, we can't remove cmp if there exists -; a swapped sub. +; (This used to test that an unsafe removal of cmp in bb.0 is not happening, +; but now we can do so safely). define i32 @func_l2(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: func_l2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: subl %edx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: subl %edx, %ecx ; CHECK-NEXT: jne .LBB8_2 ; CHECK-NEXT: # %bb.1: # %if.then -; CHECK-NEXT: cmpl %ecx, %edx -; CHECK-NEXT: cmovlel %ecx, %eax +; CHECK-NEXT: cmovll %ecx, %eax +; CHECK-NEXT: retl ; CHECK-NEXT: .LBB8_2: # %if.else +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: retl %cmp = icmp eq i32 %b, %a %sub = sub nsw i32 %a, %b diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir index 8a9c6504698b..5b8ca4c43392 100644 --- a/llvm/test/CodeGen/X86/optimize-compare.mir +++ b/llvm/test/CodeGen/X86/optimize-compare.mir @@ -32,6 +32,108 @@ body: | $al = SETCCr 4, implicit $eflags ... --- +name: opt_multiple_blocks +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: opt_multiple_blocks + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi + ; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags + ; CHECK-NEXT: PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp + ; CHECK-NEXT: $rcx = POP64r implicit-def $rsp, implicit $rsp + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg + ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags + bb.0: + %0:gr64 = COPY undef $rsi + %1:gr64 = INC64r %0, implicit-def $eflags + PUSH64r undef $rcx, implicit-def $rsp, implicit $rsp + $rcx = POP64r implicit-def $rsp, implicit $rsp + JMP_1 %bb.1 + + bb.1: + ; TEST should be removed. + TEST64rr %1, %1, implicit-def $eflags + %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg + $al = SETCCr 4, implicit $eflags +... +--- +name: opt_multiple_blocks_noopt_0 +body: | + ; CHECK-LABEL: name: opt_multiple_blocks_noopt_0 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi + ; CHECK-NEXT: [[INC64r:%[0-9]+]]:gr64 = INC64r [[COPY]], implicit-def $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TEST64rr [[INC64r]], [[INC64r]], implicit-def $eflags + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[INC64r]], 5, $noreg, 12, $noreg + ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + bb.0: + %0:gr64 = COPY undef $rsi + %1:gr64 = INC64r %0, implicit-def $eflags + JMP_1 %bb.1 + + bb.1: + ; The TEST64rr should not be removed, since there are multiple preds. + TEST64rr %1, %1, implicit-def $eflags + %2:gr64 = LEA64r %1, 5, $noreg, 12, $noreg + $al = SETCCr 4, implicit $eflags + JCC_1 %bb.1, 2, implicit $eflags + + bb.2: +... +--- +name: opt_multiple_blocks_noopt_1 +body: | + ; CHECK-LABEL: name: opt_multiple_blocks_noopt_1 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY undef $rsi + ; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 2, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags + ; CHECK-NEXT: TEST64rr [[COPY]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags + bb.0: + JMP_1 %bb.1 + + bb.1: + %0:gr64 = COPY undef $rsi + TEST64rr %0, %0, implicit-def $eflags + JCC_1 %bb.1, 2, implicit $eflags + + bb.2: + ; We should not move MOV32r0 up into the loop (that would be correct but + ; slow). + %1:gr32 = MOV32r0 implicit-def $eflags + ; TEST should not be removed because of MOV32r0. + TEST64rr %0, %0, implicit-def $eflags + $al = SETCCr 4, implicit $eflags +... +--- name: opt_zerocmp_user_0 body: | bb.0: