[X86] Handle SETB_C32r/SETB_C64r in flag copy lowering the same way we handle SBB

Previously we took the restored flag in a GPR, extended it 32 or 64 bits. Then used as an input to a sub from 0. This requires creating a zero extend and creating a 0. This patch changes this to just use an ADD with 255 to restore the carry flag and keep the SETB_C32r/SETB_C64r. Exactly like we handle SBB which is what SETB becomes. Differential Revision: https://reviews.llvm.org/D74152
2020-02-07 10:18:01 -08:00 · 2020-02-07 10:18:01 -08:00 · 278578744a
parent 13f8be68e0
commit 278578744a
2 changed files with 15 additions and 139 deletions
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@ -124,10 +124,6 @@ private:
                      MachineInstr &JmpI, CondRegArray &CondRegs);
  void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
                   MachineInstr &CopyDefI);
  void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
                               MachineBasicBlock::iterator TestPos,
                               DebugLoc TestLoc, MachineInstr &SetBI,
                               MachineOperand &FlagUse, CondRegArray &CondRegs);
  void rewriteSetCC(MachineBasicBlock &TestMBB,
                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
                    MachineInstr &SetCCI, MachineOperand &FlagUse,
@ -165,6 +161,7 @@ enum class FlagArithMnemonic {
  RCL,
  RCR,
  SBB,
  SETB,
 };
 } // namespace
@ -235,6 +232,10 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
  case X86::ADOX32rm:
  case X86::ADOX64rm:
    return FlagArithMnemonic::ADOX;
  case X86::SETB_C32r:
  case X86::SETB_C64r:
    return FlagArithMnemonic::SETB;
  }
 }
@ -638,22 +639,9 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
          // logic.
          FlagsKilled = true;
-          switch (MI.getOpcode()) {
+          // Generically handle remaining uses as arithmetic instructions.
-          case X86::SETB_C32r:
+          rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
-          case X86::SETB_C64r:
+                            CondRegs);
            // Use custom lowering for arithmetic that is merely extending the
            // carry flag. We model this as the SETB_C* pseudo instructions.
            rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
                                    CondRegs);
            break;
          default:
            // Generically handle remaining uses as arithmetic instructions.
            rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
                              CondRegs);
            break;
          }
          break;
        }
        // If this was the last use of the flags, we're done.
@ -819,6 +807,7 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic(
  case FlagArithMnemonic::RCL:
  case FlagArithMnemonic::RCR:
  case FlagArithMnemonic::SBB:
  case FlagArithMnemonic::SETB:
    Cond = X86::COND_B; // CF == 1
    // Set up an addend that when one is added will need a carry due to not
    // having a higher bit available.
@ -957,115 +946,6 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
  MI.eraseFromParent();
 }
 void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
    DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
    CondRegArray &CondRegs) {
  // This routine is only used to handle pseudos for setting a register to zero
  // or all ones based on CF. This is essentially the sign extended from 1-bit
  // form of SETB and modeled with the SETB_C* pseudos. They require special
  // handling as they aren't normal SETcc instructions and are lowered to an
  // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
  // they are only provided in reg-defining forms. A complicating factor is that
  // they can define many different register widths.
  assert(SetBI.getOperand(0).isReg() &&
         "Cannot have a non-register defined operand to this variant of SETB!");
  // Little helper to do the common final step of replacing the register def'ed
  // by this SETB instruction with a new register and removing the SETB
  // instruction.
  auto RewriteToReg = [&](unsigned Reg) {
    MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
    SetBI.eraseFromParent();
  };
  // Grab the register class used for this particular instruction.
  auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
  MachineBasicBlock &MBB = *SetBI.getParent();
  auto SetPos = SetBI.getIterator();
  auto SetLoc = SetBI.getDebugLoc();
  auto AdjustReg = [&](unsigned Reg) {
    auto &OrigRC = *MRI->getRegClass(Reg);
    if (&OrigRC == &SetBRC)
      return Reg;
    unsigned NewReg;
    int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
    int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
    assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
    assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
    int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
                       X86::NoSubRegister, X86::sub_32bit};
    // If the original size is smaller than the target *and* is smaller than 4
    // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
    // to maximize the chance of being able to CSE that operation and to avoid
    // partial dependency stalls extending to 2-bytes.
    if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
      NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
      BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
          .addReg(Reg);
      if (&SetBRC == &X86::GR32RegClass)
        return NewReg;
      Reg = NewReg;
      OrigRegSize = 4;
    }
    NewReg = MRI->createVirtualRegister(&SetBRC);
    if (OrigRegSize < TargetRegSize) {
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
              NewReg)
          .addImm(0)
          .addReg(Reg)
          .addImm(SubRegIdx[OrigRegSize]);
    } else if (OrigRegSize > TargetRegSize) {
      if (TargetRegSize == 1 && !Subtarget->is64Bit()) {
        // Need to constrain the register class.
        MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
      }
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
              NewReg)
          .addReg(Reg, 0, SubRegIdx[TargetRegSize]);
    } else {
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
          .addReg(Reg);
    }
    return NewReg;
  };
  unsigned &CondReg = CondRegs[X86::COND_B];
  if (!CondReg)
    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
  // Adjust the condition to have the desired register width by zero-extending
  // as needed.
  // FIXME: We should use a better API to avoid the local reference and using a
  // different variable here.
  unsigned ExtCondReg = AdjustReg(CondReg);
  // Now we need to turn this into a bitmask. We do this by subtracting it from
  // zero.
  Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
  BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
  ZeroReg = AdjustReg(ZeroReg);
  unsigned Sub;
  switch (SetBI.getOpcode()) {
  default: llvm_unreachable("Invalid SETB_C* opcode!");
  case X86::SETB_C32r: Sub = X86::SUB32rr; break;
  case X86::SETB_C64r: Sub = X86::SUB64rr; break;
  }
  Register ResultReg = MRI->createVirtualRegister(&SetBRC);
  BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
      .addReg(ZeroReg)
      .addReg(ExtCondReg);
  return RewriteToReg(ResultReg);
 }
 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
                                            MachineBasicBlock::iterator TestPos,
                                            DebugLoc TestLoc,
--- a/llvm/test/CodeGen/X86/flags-copy-lowering.mir
+++ b/llvm/test/CodeGen/X86/flags-copy-lowering.mir
@ -544,21 +544,17 @@ body:             |
    %4:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
    MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %4
  ; CHECK-NOT:     $eflags =
-  ; CHECK:         %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+  ; CHECK:         dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
-  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
+  ; CHECK-NEXT:    %[[SETB:[^:]*]]:gr32 = SETB_C32r implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
-  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
+  ; CHECK-NEXT:    MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[SETB]]
  ; CHECK-NEXT:    MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
    $eflags = COPY %3
    %5:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
  ; CHECK-NOT:     $eflags =
-  ; CHECK:         %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+  ; CHECK:         dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
-  ; CHECK-NEXT:    %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
+  ; CHECK-NEXT:    %[[SETB:[^:]*]]:gr64 = SETB_C64r implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
-  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
+  ; CHECK-NEXT:    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %[[SETB]]
  ; CHECK-NEXT:    %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
  ; CHECK-NEXT:    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
    RET 0