forked from OSchip/llvm-project
[X86] Handle SETB_C32r/SETB_C64r in flag copy lowering the same way we handle SBB
Previously we took the restored flag in a GPR, extended it 32 or 64 bits. Then used as an input to a sub from 0. This requires creating a zero extend and creating a 0. This patch changes this to just use an ADD with 255 to restore the carry flag and keep the SETB_C32r/SETB_C64r. Exactly like we handle SBB which is what SETB becomes. Differential Revision: https://reviews.llvm.org/D74152
This commit is contained in:
parent
13f8be68e0
commit
278578744a
|
@ -124,10 +124,6 @@ private:
|
||||||
MachineInstr &JmpI, CondRegArray &CondRegs);
|
MachineInstr &JmpI, CondRegArray &CondRegs);
|
||||||
void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
|
void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
|
||||||
MachineInstr &CopyDefI);
|
MachineInstr &CopyDefI);
|
||||||
void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
|
|
||||||
MachineBasicBlock::iterator TestPos,
|
|
||||||
DebugLoc TestLoc, MachineInstr &SetBI,
|
|
||||||
MachineOperand &FlagUse, CondRegArray &CondRegs);
|
|
||||||
void rewriteSetCC(MachineBasicBlock &TestMBB,
|
void rewriteSetCC(MachineBasicBlock &TestMBB,
|
||||||
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
|
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
|
||||||
MachineInstr &SetCCI, MachineOperand &FlagUse,
|
MachineInstr &SetCCI, MachineOperand &FlagUse,
|
||||||
|
@ -165,6 +161,7 @@ enum class FlagArithMnemonic {
|
||||||
RCL,
|
RCL,
|
||||||
RCR,
|
RCR,
|
||||||
SBB,
|
SBB,
|
||||||
|
SETB,
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
@ -235,6 +232,10 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
|
||||||
case X86::ADOX32rm:
|
case X86::ADOX32rm:
|
||||||
case X86::ADOX64rm:
|
case X86::ADOX64rm:
|
||||||
return FlagArithMnemonic::ADOX;
|
return FlagArithMnemonic::ADOX;
|
||||||
|
|
||||||
|
case X86::SETB_C32r:
|
||||||
|
case X86::SETB_C64r:
|
||||||
|
return FlagArithMnemonic::SETB;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -638,22 +639,9 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// logic.
|
// logic.
|
||||||
FlagsKilled = true;
|
FlagsKilled = true;
|
||||||
|
|
||||||
switch (MI.getOpcode()) {
|
// Generically handle remaining uses as arithmetic instructions.
|
||||||
case X86::SETB_C32r:
|
rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
|
||||||
case X86::SETB_C64r:
|
CondRegs);
|
||||||
// Use custom lowering for arithmetic that is merely extending the
|
|
||||||
// carry flag. We model this as the SETB_C* pseudo instructions.
|
|
||||||
rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
|
|
||||||
CondRegs);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Generically handle remaining uses as arithmetic instructions.
|
|
||||||
rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
|
|
||||||
CondRegs);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this was the last use of the flags, we're done.
|
// If this was the last use of the flags, we're done.
|
||||||
|
@ -819,6 +807,7 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic(
|
||||||
case FlagArithMnemonic::RCL:
|
case FlagArithMnemonic::RCL:
|
||||||
case FlagArithMnemonic::RCR:
|
case FlagArithMnemonic::RCR:
|
||||||
case FlagArithMnemonic::SBB:
|
case FlagArithMnemonic::SBB:
|
||||||
|
case FlagArithMnemonic::SETB:
|
||||||
Cond = X86::COND_B; // CF == 1
|
Cond = X86::COND_B; // CF == 1
|
||||||
// Set up an addend that when one is added will need a carry due to not
|
// Set up an addend that when one is added will need a carry due to not
|
||||||
// having a higher bit available.
|
// having a higher bit available.
|
||||||
|
@ -957,115 +946,6 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
}
|
}
|
||||||
|
|
||||||
void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
|
|
||||||
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
|
|
||||||
DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
|
|
||||||
CondRegArray &CondRegs) {
|
|
||||||
// This routine is only used to handle pseudos for setting a register to zero
|
|
||||||
// or all ones based on CF. This is essentially the sign extended from 1-bit
|
|
||||||
// form of SETB and modeled with the SETB_C* pseudos. They require special
|
|
||||||
// handling as they aren't normal SETcc instructions and are lowered to an
|
|
||||||
// EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
|
|
||||||
// they are only provided in reg-defining forms. A complicating factor is that
|
|
||||||
// they can define many different register widths.
|
|
||||||
assert(SetBI.getOperand(0).isReg() &&
|
|
||||||
"Cannot have a non-register defined operand to this variant of SETB!");
|
|
||||||
|
|
||||||
// Little helper to do the common final step of replacing the register def'ed
|
|
||||||
// by this SETB instruction with a new register and removing the SETB
|
|
||||||
// instruction.
|
|
||||||
auto RewriteToReg = [&](unsigned Reg) {
|
|
||||||
MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
|
|
||||||
SetBI.eraseFromParent();
|
|
||||||
};
|
|
||||||
|
|
||||||
// Grab the register class used for this particular instruction.
|
|
||||||
auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
|
|
||||||
|
|
||||||
MachineBasicBlock &MBB = *SetBI.getParent();
|
|
||||||
auto SetPos = SetBI.getIterator();
|
|
||||||
auto SetLoc = SetBI.getDebugLoc();
|
|
||||||
|
|
||||||
auto AdjustReg = [&](unsigned Reg) {
|
|
||||||
auto &OrigRC = *MRI->getRegClass(Reg);
|
|
||||||
if (&OrigRC == &SetBRC)
|
|
||||||
return Reg;
|
|
||||||
|
|
||||||
unsigned NewReg;
|
|
||||||
|
|
||||||
int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
|
|
||||||
int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
|
|
||||||
assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
|
|
||||||
assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
|
|
||||||
int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
|
|
||||||
X86::NoSubRegister, X86::sub_32bit};
|
|
||||||
|
|
||||||
// If the original size is smaller than the target *and* is smaller than 4
|
|
||||||
// bytes, we need to explicitly zero extend it. We always extend to 4-bytes
|
|
||||||
// to maximize the chance of being able to CSE that operation and to avoid
|
|
||||||
// partial dependency stalls extending to 2-bytes.
|
|
||||||
if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
|
|
||||||
NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
|
|
||||||
BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
|
|
||||||
.addReg(Reg);
|
|
||||||
if (&SetBRC == &X86::GR32RegClass)
|
|
||||||
return NewReg;
|
|
||||||
Reg = NewReg;
|
|
||||||
OrigRegSize = 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
NewReg = MRI->createVirtualRegister(&SetBRC);
|
|
||||||
if (OrigRegSize < TargetRegSize) {
|
|
||||||
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
|
|
||||||
NewReg)
|
|
||||||
.addImm(0)
|
|
||||||
.addReg(Reg)
|
|
||||||
.addImm(SubRegIdx[OrigRegSize]);
|
|
||||||
} else if (OrigRegSize > TargetRegSize) {
|
|
||||||
if (TargetRegSize == 1 && !Subtarget->is64Bit()) {
|
|
||||||
// Need to constrain the register class.
|
|
||||||
MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
|
|
||||||
}
|
|
||||||
|
|
||||||
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
|
|
||||||
NewReg)
|
|
||||||
.addReg(Reg, 0, SubRegIdx[TargetRegSize]);
|
|
||||||
} else {
|
|
||||||
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
|
|
||||||
.addReg(Reg);
|
|
||||||
}
|
|
||||||
return NewReg;
|
|
||||||
};
|
|
||||||
|
|
||||||
unsigned &CondReg = CondRegs[X86::COND_B];
|
|
||||||
if (!CondReg)
|
|
||||||
CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
|
|
||||||
|
|
||||||
// Adjust the condition to have the desired register width by zero-extending
|
|
||||||
// as needed.
|
|
||||||
// FIXME: We should use a better API to avoid the local reference and using a
|
|
||||||
// different variable here.
|
|
||||||
unsigned ExtCondReg = AdjustReg(CondReg);
|
|
||||||
|
|
||||||
// Now we need to turn this into a bitmask. We do this by subtracting it from
|
|
||||||
// zero.
|
|
||||||
Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
|
|
||||||
BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
|
|
||||||
ZeroReg = AdjustReg(ZeroReg);
|
|
||||||
|
|
||||||
unsigned Sub;
|
|
||||||
switch (SetBI.getOpcode()) {
|
|
||||||
default: llvm_unreachable("Invalid SETB_C* opcode!");
|
|
||||||
case X86::SETB_C32r: Sub = X86::SUB32rr; break;
|
|
||||||
case X86::SETB_C64r: Sub = X86::SUB64rr; break;
|
|
||||||
}
|
|
||||||
Register ResultReg = MRI->createVirtualRegister(&SetBRC);
|
|
||||||
BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
|
|
||||||
.addReg(ZeroReg)
|
|
||||||
.addReg(ExtCondReg);
|
|
||||||
return RewriteToReg(ResultReg);
|
|
||||||
}
|
|
||||||
|
|
||||||
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
|
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
|
||||||
MachineBasicBlock::iterator TestPos,
|
MachineBasicBlock::iterator TestPos,
|
||||||
DebugLoc TestLoc,
|
DebugLoc TestLoc,
|
||||||
|
|
|
@ -544,21 +544,17 @@ body: |
|
||||||
%4:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
|
%4:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
|
||||||
MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %4
|
MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %4
|
||||||
; CHECK-NOT: $eflags =
|
; CHECK-NOT: $eflags =
|
||||||
; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
|
; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
|
||||||
; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
|
; CHECK-NEXT: %[[SETB:[^:]*]]:gr32 = SETB_C32r implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
|
||||||
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
|
; CHECK-NEXT: MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[SETB]]
|
||||||
; CHECK-NEXT: MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
|
|
||||||
|
|
||||||
$eflags = COPY %3
|
$eflags = COPY %3
|
||||||
%5:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
|
%5:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
|
||||||
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
|
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
|
||||||
; CHECK-NOT: $eflags =
|
; CHECK-NOT: $eflags =
|
||||||
; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
|
; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
|
||||||
; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
|
; CHECK-NEXT: %[[SETB:[^:]*]]:gr64 = SETB_C64r implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
|
||||||
; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
|
; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %[[SETB]]
|
||||||
; CHECK-NEXT: %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
|
|
||||||
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
|
|
||||||
; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
|
|
||||||
|
|
||||||
RET 0
|
RET 0
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue