forked from OSchip/llvm-project
[x86/SLH] Remove complex SHRX-based post-load hardening.
This code was really nasty, had several bugs in it originally, and wasn't carrying its weight. While on Zen we have all 4 ports available for SHRX, on all of the Intel parts with Agner's tables, SHRX can only execute on 2 ports, giving it 1/2 the throughput of OR. Worse, all too often this pattern required two SHRX instructions in a chain, hurting the critical path by a lot. Even if we end up needing to safe/restore EFLAGS, that is no longer so bad. We pay for a uop to save the flag, but we very likely get fusion when it is used by forming a test/jCC pair or something similar. In practice, I don't expect the SHRX to be a significant savings here, so I'd like to avoid the complex code required. We can always resurrect this if/when someone has a specific performance issue addressed by it. llvm-svn: 337781
This commit is contained in:
parent
7adcf292a1
commit
b46c22de00
|
@ -1907,81 +1907,18 @@ void X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
|
|||
|
||||
auto InsertPt = std::next(MI.getIterator());
|
||||
unsigned FlagsReg = 0;
|
||||
bool EFLAGSLive = isEFLAGSLive(MBB, InsertPt, *TRI);
|
||||
if (EFLAGSLive && !Subtarget->hasBMI2()) {
|
||||
if (isEFLAGSLive(MBB, InsertPt, *TRI))
|
||||
FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
|
||||
EFLAGSLive = false;
|
||||
}
|
||||
|
||||
if (!EFLAGSLive) {
|
||||
unsigned StateReg = GetStateRegInRC(*DefRC);
|
||||
unsigned NewDefReg = MRI->createVirtualRegister(DefRC);
|
||||
DefOp.setReg(NewDefReg);
|
||||
auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), OldDefReg)
|
||||
.addReg(StateReg)
|
||||
.addReg(NewDefReg);
|
||||
OrI->addRegisterDead(X86::EFLAGS, TRI);
|
||||
++NumInstsInserted;
|
||||
LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
|
||||
} else {
|
||||
assert(Subtarget->hasBMI2() &&
|
||||
"Cannot harden loads and preserve EFLAGS without BMI2!");
|
||||
|
||||
unsigned ShiftOpCode = DefRegBytes < 4 ? X86::SHRX32rr : X86::SHRX64rr;
|
||||
auto &ShiftRC =
|
||||
DefRegBytes < 4 ? X86::GR32_NOSPRegClass : X86::GR64_NOSPRegClass;
|
||||
int ShiftRegBytes = TRI->getRegSizeInBits(ShiftRC) / 8;
|
||||
unsigned DefSubRegImm = SubRegImms[Log2_32(DefRegBytes)];
|
||||
|
||||
unsigned StateReg = GetStateRegInRC(ShiftRC);
|
||||
|
||||
// First have the def instruction def a temporary register.
|
||||
unsigned TmpReg = MRI->createVirtualRegister(DefRC);
|
||||
DefOp.setReg(TmpReg);
|
||||
// Now copy it into a register of the shift RC.
|
||||
unsigned ShiftInputReg = TmpReg;
|
||||
if (DefRegBytes != ShiftRegBytes) {
|
||||
unsigned UndefReg = MRI->createVirtualRegister(&ShiftRC);
|
||||
BuildMI(MBB, InsertPt, Loc, TII->get(X86::IMPLICIT_DEF), UndefReg);
|
||||
ShiftInputReg = MRI->createVirtualRegister(&ShiftRC);
|
||||
BuildMI(MBB, InsertPt, Loc, TII->get(X86::INSERT_SUBREG), ShiftInputReg)
|
||||
.addReg(UndefReg)
|
||||
.addReg(TmpReg)
|
||||
.addImm(DefSubRegImm);
|
||||
}
|
||||
|
||||
// We shift this once if the shift is wider than the def and thus we can
|
||||
// shift *all* of the def'ed bytes out. Otherwise we need to do two shifts.
|
||||
|
||||
unsigned ShiftedReg = MRI->createVirtualRegister(&ShiftRC);
|
||||
auto Shift1I =
|
||||
BuildMI(MBB, InsertPt, Loc, TII->get(ShiftOpCode), ShiftedReg)
|
||||
.addReg(ShiftInputReg)
|
||||
.addReg(StateReg);
|
||||
(void)Shift1I;
|
||||
++NumInstsInserted;
|
||||
LLVM_DEBUG(dbgs() << " Inserting shrx: "; Shift1I->dump(); dbgs() << "\n");
|
||||
|
||||
// The only way we have a bit left is if all 8 bytes were defined. Do an
|
||||
// extra shift to get the last bit in this case.
|
||||
if (DefRegBytes == ShiftRegBytes) {
|
||||
// We can just directly def the old def register as its the same size.
|
||||
ShiftInputReg = ShiftedReg;
|
||||
auto Shift2I =
|
||||
BuildMI(MBB, InsertPt, Loc, TII->get(ShiftOpCode), OldDefReg)
|
||||
.addReg(ShiftInputReg)
|
||||
.addReg(StateReg);
|
||||
(void)Shift2I;
|
||||
++NumInstsInserted;
|
||||
LLVM_DEBUG(dbgs() << " Inserting shrx: "; Shift2I->dump();
|
||||
dbgs() << "\n");
|
||||
} else {
|
||||
// When we have different size shift register we need to fix up the
|
||||
// class. We can do that as we copy into the old def register.
|
||||
BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), OldDefReg)
|
||||
.addReg(ShiftedReg, 0, DefSubRegImm);
|
||||
}
|
||||
}
|
||||
unsigned StateReg = GetStateRegInRC(*DefRC);
|
||||
unsigned NewDefReg = MRI->createVirtualRegister(DefRC);
|
||||
DefOp.setReg(NewDefReg);
|
||||
auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), OldDefReg)
|
||||
.addReg(StateReg)
|
||||
.addReg(NewDefReg);
|
||||
OrI->addRegisterDead(X86::EFLAGS, TRI);
|
||||
++NumInstsInserted;
|
||||
LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
|
||||
|
||||
if (FlagsReg)
|
||||
restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
|
||||
|
|
Loading…
Reference in New Issue