2017-12-15 15:27:53 +08:00
|
|
|
//===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-12-15 15:27:53 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// A pre-emit peephole for catching opportunities introduced by late passes such
|
|
|
|
// as MachineBlockPlacement.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "PPC.h"
|
|
|
|
#include "PPCInstrInfo.h"
|
|
|
|
#include "PPCSubtarget.h"
|
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
|
|
|
#include "llvm/CodeGen/LivePhysRegs.h"
|
2018-09-26 20:32:45 +08:00
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2017-12-15 15:27:53 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
[PowerPC] Add linker opt for PC Relative GOT indirect accesses
A linker optimization is available on PowerPC for GOT indirect PCRelative loads.
The idea is that we can mark a usual GOT indirect load:
pld 3, vec@got@pcrel(0), 1
lwa 3, 4(3)
With a relocation to say that if we don't need to go through the GOT we can let
the linker further optimize this and replace a load with a nop.
pld 3, vec@got@pcrel(0), 1
.Lpcrel1:
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
lwa 3, 4(3)
This patch adds the logic that allows the compiler to add the R_PPC64_PCREL_OPT.
Reviewers: nemanjai, lei, hfinkel, sfertile, efriedma, tstellar, grosbach
Reviewed By: nemanjai
Differential Revision: https://reviews.llvm.org/D79864
2020-07-22 03:29:54 +08:00
|
|
|
#include "llvm/MC/MCContext.h"
|
2017-12-15 15:27:53 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "ppc-pre-emit-peephole"
|
|
|
|
|
|
|
|
STATISTIC(NumRRConvertedInPreEmit,
|
|
|
|
"Number of r+r instructions converted to r+i in pre-emit peephole");
|
|
|
|
STATISTIC(NumRemovedInPreEmit,
|
|
|
|
"Number of instructions deleted in pre-emit peephole");
|
2018-10-09 18:54:04 +08:00
|
|
|
STATISTIC(NumberOfSelfCopies,
|
|
|
|
"Number of self copy instructions eliminated");
|
2019-10-25 16:13:30 +08:00
|
|
|
STATISTIC(NumFrameOffFoldInPreEmit,
|
|
|
|
"Number of folding frame offset by using r+r in pre-emit peephole");
|
2017-12-15 15:27:53 +08:00
|
|
|
|
2020-08-10 21:47:09 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
|
|
|
|
cl::desc("enable PC Relative linker optimization"));
|
|
|
|
|
2017-12-15 15:27:53 +08:00
|
|
|
static cl::opt<bool>
|
2017-12-29 20:22:27 +08:00
|
|
|
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
|
2017-12-15 15:27:53 +08:00
|
|
|
cl::desc("Run pre-emit peephole optimizations."));
|
|
|
|
|
|
|
|
namespace {
|
[PowerPC] Add linker opt for PC Relative GOT indirect accesses
A linker optimization is available on PowerPC for GOT indirect PCRelative loads.
The idea is that we can mark a usual GOT indirect load:
pld 3, vec@got@pcrel(0), 1
lwa 3, 4(3)
With a relocation to say that if we don't need to go through the GOT we can let
the linker further optimize this and replace a load with a nop.
pld 3, vec@got@pcrel(0), 1
.Lpcrel1:
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
lwa 3, 4(3)
This patch adds the logic that allows the compiler to add the R_PPC64_PCREL_OPT.
Reviewers: nemanjai, lei, hfinkel, sfertile, efriedma, tstellar, grosbach
Reviewed By: nemanjai
Differential Revision: https://reviews.llvm.org/D79864
2020-07-22 03:29:54 +08:00
|
|
|
|
|
|
|
static bool hasPCRelativeForm(MachineInstr &Use) {
|
|
|
|
switch (Use.getOpcode()) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case PPC::LBZ:
|
|
|
|
case PPC::LBZ8:
|
|
|
|
case PPC::LHA:
|
|
|
|
case PPC::LHA8:
|
|
|
|
case PPC::LHZ:
|
|
|
|
case PPC::LHZ8:
|
|
|
|
case PPC::LWZ:
|
|
|
|
case PPC::LWZ8:
|
|
|
|
case PPC::STB:
|
|
|
|
case PPC::STB8:
|
|
|
|
case PPC::STH:
|
|
|
|
case PPC::STH8:
|
|
|
|
case PPC::STW:
|
|
|
|
case PPC::STW8:
|
|
|
|
case PPC::LD:
|
|
|
|
case PPC::STD:
|
|
|
|
case PPC::LWA:
|
|
|
|
case PPC::LXSD:
|
|
|
|
case PPC::LXSSP:
|
|
|
|
case PPC::LXV:
|
|
|
|
case PPC::STXSD:
|
|
|
|
case PPC::STXSSP:
|
|
|
|
case PPC::STXV:
|
|
|
|
case PPC::LFD:
|
|
|
|
case PPC::LFS:
|
|
|
|
case PPC::STFD:
|
|
|
|
case PPC::STFS:
|
|
|
|
case PPC::DFLOADf32:
|
|
|
|
case PPC::DFLOADf64:
|
|
|
|
case PPC::DFSTOREf32:
|
|
|
|
case PPC::DFSTOREf64:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-15 15:27:53 +08:00
|
|
|
class PPCPreEmitPeephole : public MachineFunctionPass {
|
|
|
|
public:
|
|
|
|
static char ID;
|
|
|
|
PPCPreEmitPeephole() : MachineFunctionPass(ID) {
|
|
|
|
initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineFunctionProperties getRequiredProperties() const override {
|
|
|
|
return MachineFunctionProperties().set(
|
|
|
|
MachineFunctionProperties::Property::NoVRegs);
|
|
|
|
}
|
|
|
|
|
2019-07-24 03:11:07 +08:00
|
|
|
// This function removes any redundant load immediates. It has two level
|
|
|
|
// loops - The outer loop finds the load immediates BBI that could be used
|
|
|
|
// to replace following redundancy. The inner loop scans instructions that
|
|
|
|
// after BBI to find redundancy and update kill/dead flags accordingly. If
|
|
|
|
// AfterBBI is the same as BBI, it is redundant, otherwise any instructions
|
|
|
|
// that modify the def register of BBI would break the scanning.
|
|
|
|
// DeadOrKillToUnset is a pointer to the previous operand that had the
|
|
|
|
// kill/dead flag set. It keeps track of the def register of BBI, the use
|
|
|
|
// registers of AfterBBIs and the def registers of AfterBBIs.
|
|
|
|
bool removeRedundantLIs(MachineBasicBlock &MBB,
|
|
|
|
const TargetRegisterInfo *TRI) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
|
|
|
|
MBB.dump(); dbgs() << "\n");
|
|
|
|
|
|
|
|
DenseSet<MachineInstr *> InstrsToErase;
|
|
|
|
for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
|
|
|
|
// Skip load immediate that is marked to be erased later because it
|
|
|
|
// cannot be used to replace any other instructions.
|
2020-12-20 02:43:18 +08:00
|
|
|
if (InstrsToErase.contains(&*BBI))
|
2019-07-24 03:11:07 +08:00
|
|
|
continue;
|
|
|
|
// Skip non-load immediate.
|
|
|
|
unsigned Opc = BBI->getOpcode();
|
|
|
|
if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
|
|
|
|
Opc != PPC::LIS8)
|
|
|
|
continue;
|
|
|
|
// Skip load immediate, where the operand is a relocation (e.g., $r3 =
|
|
|
|
// LI target-flags(ppc-lo) %const.0).
|
|
|
|
if (!BBI->getOperand(1).isImm())
|
|
|
|
continue;
|
|
|
|
assert(BBI->getOperand(0).isReg() &&
|
|
|
|
"Expected a register for the first operand");
|
|
|
|
|
|
|
|
LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
|
|
|
|
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Reg = BBI->getOperand(0).getReg();
|
2019-07-24 03:11:07 +08:00
|
|
|
int64_t Imm = BBI->getOperand(1).getImm();
|
|
|
|
MachineOperand *DeadOrKillToUnset = nullptr;
|
|
|
|
if (BBI->getOperand(0).isDead()) {
|
|
|
|
DeadOrKillToUnset = &BBI->getOperand(0);
|
|
|
|
LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
|
|
|
|
<< " from load immediate " << *BBI
|
|
|
|
<< " is a unsetting candidate\n");
|
|
|
|
}
|
|
|
|
// This loop scans instructions after BBI to see if there is any
|
|
|
|
// redundant load immediate.
|
|
|
|
for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
|
|
|
|
++AfterBBI) {
|
|
|
|
// Track the operand that kill Reg. We would unset the kill flag of
|
|
|
|
// the operand if there is a following redundant load immediate.
|
|
|
|
int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
|
2020-04-10 10:34:30 +08:00
|
|
|
|
|
|
|
// We can't just clear implicit kills, so if we encounter one, stop
|
|
|
|
// looking further.
|
|
|
|
if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
|
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
<< "Encountered an implicit kill, cannot proceed: ");
|
|
|
|
LLVM_DEBUG(AfterBBI->dump());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-07-24 03:11:07 +08:00
|
|
|
if (KillIdx != -1) {
|
|
|
|
assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
|
|
|
|
DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
|
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
<< " Kill flag of " << *DeadOrKillToUnset << " from "
|
|
|
|
<< *AfterBBI << " is a unsetting candidate\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!AfterBBI->modifiesRegister(Reg, TRI))
|
|
|
|
continue;
|
|
|
|
// Finish scanning because Reg is overwritten by a non-load
|
|
|
|
// instruction.
|
|
|
|
if (AfterBBI->getOpcode() != Opc)
|
|
|
|
break;
|
|
|
|
assert(AfterBBI->getOperand(0).isReg() &&
|
|
|
|
"Expected a register for the first operand");
|
|
|
|
// Finish scanning because Reg is overwritten by a relocation or a
|
|
|
|
// different value.
|
|
|
|
if (!AfterBBI->getOperand(1).isImm() ||
|
|
|
|
AfterBBI->getOperand(1).getImm() != Imm)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// It loads same immediate value to the same Reg, which is redundant.
|
|
|
|
// We would unset kill flag in previous Reg usage to extend live range
|
|
|
|
// of Reg first, then remove the redundancy.
|
2019-10-11 13:32:29 +08:00
|
|
|
if (DeadOrKillToUnset) {
|
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
<< " Unset dead/kill flag of " << *DeadOrKillToUnset
|
|
|
|
<< " from " << *DeadOrKillToUnset->getParent());
|
|
|
|
if (DeadOrKillToUnset->isDef())
|
|
|
|
DeadOrKillToUnset->setIsDead(false);
|
|
|
|
else
|
|
|
|
DeadOrKillToUnset->setIsKill(false);
|
|
|
|
}
|
2019-07-24 03:11:07 +08:00
|
|
|
DeadOrKillToUnset =
|
|
|
|
AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
|
|
|
|
if (DeadOrKillToUnset)
|
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
<< " Dead flag of " << *DeadOrKillToUnset << " from "
|
|
|
|
<< *AfterBBI << " is a unsetting candidate\n");
|
|
|
|
InstrsToErase.insert(&*AfterBBI);
|
|
|
|
LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
|
|
|
|
AfterBBI->dump());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (MachineInstr *MI : InstrsToErase) {
|
|
|
|
MI->eraseFromParent();
|
|
|
|
}
|
|
|
|
NumRemovedInPreEmit += InstrsToErase.size();
|
|
|
|
return !InstrsToErase.empty();
|
|
|
|
}
|
|
|
|
|
[PowerPC] Add linker opt for PC Relative GOT indirect accesses
A linker optimization is available on PowerPC for GOT indirect PCRelative loads.
The idea is that we can mark a usual GOT indirect load:
pld 3, vec@got@pcrel(0), 1
lwa 3, 4(3)
With a relocation to say that if we don't need to go through the GOT we can let
the linker further optimize this and replace a load with a nop.
pld 3, vec@got@pcrel(0), 1
.Lpcrel1:
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
lwa 3, 4(3)
This patch adds the logic that allows the compiler to add the R_PPC64_PCREL_OPT.
Reviewers: nemanjai, lei, hfinkel, sfertile, efriedma, tstellar, grosbach
Reviewed By: nemanjai
Differential Revision: https://reviews.llvm.org/D79864
2020-07-22 03:29:54 +08:00
|
|
|
// Check if this instruction is a PLDpc that is part of a GOT indirect
|
|
|
|
// access.
|
|
|
|
bool isGOTPLDpc(MachineInstr &Instr) {
|
|
|
|
if (Instr.getOpcode() != PPC::PLDpc)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// The result must be a register.
|
|
|
|
const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
|
|
|
|
if (!LoadedAddressReg.isReg())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Make sure that this is a global symbol.
|
|
|
|
const MachineOperand &SymbolOp = Instr.getOperand(1);
|
|
|
|
if (!SymbolOp.isGlobal())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Finally return true only if the GOT flag is present.
|
|
|
|
return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
|
|
|
|
MachineFunction *MF = MBB.getParent();
|
2020-08-10 21:47:09 +08:00
|
|
|
// If the linker opt is disabled then just return.
|
|
|
|
if (!EnablePCRelLinkerOpt)
|
|
|
|
return false;
|
|
|
|
|
[PowerPC] Add linker opt for PC Relative GOT indirect accesses
A linker optimization is available on PowerPC for GOT indirect PCRelative loads.
The idea is that we can mark a usual GOT indirect load:
pld 3, vec@got@pcrel(0), 1
lwa 3, 4(3)
With a relocation to say that if we don't need to go through the GOT we can let
the linker further optimize this and replace a load with a nop.
pld 3, vec@got@pcrel(0), 1
.Lpcrel1:
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
lwa 3, 4(3)
This patch adds the logic that allows the compiler to add the R_PPC64_PCREL_OPT.
Reviewers: nemanjai, lei, hfinkel, sfertile, efriedma, tstellar, grosbach
Reviewed By: nemanjai
Differential Revision: https://reviews.llvm.org/D79864
2020-07-22 03:29:54 +08:00
|
|
|
// Add this linker opt only if we are using PC Relative memops.
|
|
|
|
if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Struct to keep track of one def/use pair for a GOT indirect access.
|
|
|
|
struct GOTDefUsePair {
|
|
|
|
MachineBasicBlock::iterator DefInst;
|
|
|
|
MachineBasicBlock::iterator UseInst;
|
|
|
|
Register DefReg;
|
|
|
|
Register UseReg;
|
|
|
|
bool StillValid;
|
|
|
|
};
|
|
|
|
// Vector of def/ues pairs in this basic block.
|
|
|
|
SmallVector<GOTDefUsePair, 4> CandPairs;
|
|
|
|
SmallVector<GOTDefUsePair, 4> ValidPairs;
|
|
|
|
bool MadeChange = false;
|
|
|
|
|
|
|
|
// Run through all of the instructions in the basic block and try to
|
|
|
|
// collect potential pairs of GOT indirect access instructions.
|
|
|
|
for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
|
|
|
|
// Look for the initial GOT indirect load.
|
|
|
|
if (isGOTPLDpc(*BBI)) {
|
|
|
|
GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
|
|
|
|
BBI->getOperand(0).getReg(),
|
|
|
|
PPC::NoRegister, true};
|
|
|
|
CandPairs.push_back(CurrentPair);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We haven't encountered any new PLD instructions, nothing to check.
|
|
|
|
if (CandPairs.empty())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Run through the candidate pairs and see if any of the registers
|
|
|
|
// defined in the PLD instructions are used by this instruction.
|
|
|
|
// Note: the size of CandPairs can change in the loop.
|
|
|
|
for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
|
|
|
|
GOTDefUsePair &Pair = CandPairs[Idx];
|
|
|
|
// The instruction does not use or modify this PLD's def reg,
|
|
|
|
// ignore it.
|
|
|
|
if (!BBI->readsRegister(Pair.DefReg, TRI) &&
|
|
|
|
!BBI->modifiesRegister(Pair.DefReg, TRI))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// The use needs to be used in the address compuation and not
|
|
|
|
// as the register being stored for a store.
|
|
|
|
const MachineOperand *UseOp =
|
|
|
|
hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
|
|
|
|
|
|
|
|
// Check for a valid use.
|
|
|
|
if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
|
|
|
|
UseOp->isUse() && UseOp->isKill()) {
|
|
|
|
Pair.UseInst = BBI;
|
|
|
|
Pair.UseReg = BBI->getOperand(0).getReg();
|
|
|
|
ValidPairs.push_back(Pair);
|
|
|
|
}
|
|
|
|
CandPairs.erase(CandPairs.begin() + Idx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Go through all of the pairs and check for any more valid uses.
|
|
|
|
for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
|
|
|
|
// We shouldn't be here if we don't have a valid pair.
|
|
|
|
assert(Pair->UseInst.isValid() && Pair->StillValid &&
|
|
|
|
"Kept an invalid def/use pair for GOT PCRel opt");
|
|
|
|
// We have found a potential pair. Search through the instructions
|
|
|
|
// between the def and the use to see if it is valid to mark this as a
|
|
|
|
// linker opt.
|
|
|
|
MachineBasicBlock::iterator BBI = Pair->DefInst;
|
|
|
|
++BBI;
|
|
|
|
for (; BBI != Pair->UseInst; ++BBI) {
|
|
|
|
if (BBI->readsRegister(Pair->UseReg, TRI) ||
|
|
|
|
BBI->modifiesRegister(Pair->UseReg, TRI)) {
|
|
|
|
Pair->StillValid = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Pair->StillValid)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// The load/store instruction that uses the address from the PLD will
|
|
|
|
// either use a register (for a store) or define a register (for the
|
|
|
|
// load). That register will be added as an implicit def to the PLD
|
|
|
|
// and as an implicit use on the second memory op. This is a precaution
|
|
|
|
// to prevent future passes from using that register between the two
|
|
|
|
// instructions.
|
|
|
|
MachineOperand ImplDef =
|
|
|
|
MachineOperand::CreateReg(Pair->UseReg, true, true);
|
|
|
|
MachineOperand ImplUse =
|
|
|
|
MachineOperand::CreateReg(Pair->UseReg, false, true);
|
|
|
|
Pair->DefInst->addOperand(ImplDef);
|
|
|
|
Pair->UseInst->addOperand(ImplUse);
|
|
|
|
|
|
|
|
// Create the symbol.
|
|
|
|
MCContext &Context = MF->getContext();
|
2020-12-22 06:04:13 +08:00
|
|
|
MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
|
[PowerPC] Add linker opt for PC Relative GOT indirect accesses
A linker optimization is available on PowerPC for GOT indirect PCRelative loads.
The idea is that we can mark a usual GOT indirect load:
pld 3, vec@got@pcrel(0), 1
lwa 3, 4(3)
With a relocation to say that if we don't need to go through the GOT we can let
the linker further optimize this and replace a load with a nop.
pld 3, vec@got@pcrel(0), 1
.Lpcrel1:
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
lwa 3, 4(3)
This patch adds the logic that allows the compiler to add the R_PPC64_PCREL_OPT.
Reviewers: nemanjai, lei, hfinkel, sfertile, efriedma, tstellar, grosbach
Reviewed By: nemanjai
Differential Revision: https://reviews.llvm.org/D79864
2020-07-22 03:29:54 +08:00
|
|
|
MachineOperand PCRelLabel =
|
|
|
|
MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
|
|
|
|
Pair->DefInst->addOperand(*MF, PCRelLabel);
|
|
|
|
Pair->UseInst->addOperand(*MF, PCRelLabel);
|
|
|
|
MadeChange |= true;
|
|
|
|
}
|
|
|
|
return MadeChange;
|
|
|
|
}
|
|
|
|
|
2020-11-19 04:58:28 +08:00
|
|
|
// This function removes redundant pairs of accumulator prime/unprime
|
|
|
|
// instructions. In some situations, it's possible the compiler inserts an
|
|
|
|
// accumulator prime instruction followed by an unprime instruction (e.g.
|
|
|
|
// when we store an accumulator after restoring it from a spill). If the
|
|
|
|
// accumulator is not used between the two, they can be removed. This
|
|
|
|
// function removes these redundant pairs from basic blocks.
|
|
|
|
// The algorithm is quite straightforward - every time we encounter a prime
|
|
|
|
// instruction, the primed register is added to a candidate set. Any use
|
|
|
|
// other than a prime removes the candidate from the set and any de-prime
|
|
|
|
// of a current candidate marks both the prime and de-prime for removal.
|
|
|
|
// This way we ensure we only remove prime/de-prime *pairs* with no
|
|
|
|
// intervening uses.
|
|
|
|
bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
|
|
|
|
DenseSet<MachineInstr *> InstrsToErase;
|
|
|
|
// Initially, none of the acc registers are candidates.
|
|
|
|
SmallVector<MachineInstr *, 8> Candidates(
|
|
|
|
PPC::UACCRCRegClass.getNumRegs(), nullptr);
|
|
|
|
|
|
|
|
for (MachineInstr &BBI : MBB.instrs()) {
|
|
|
|
unsigned Opc = BBI.getOpcode();
|
|
|
|
// If we are visiting a xxmtacc instruction, we add it and its operand
|
|
|
|
// register to the candidate set.
|
|
|
|
if (Opc == PPC::XXMTACC) {
|
|
|
|
Register Acc = BBI.getOperand(0).getReg();
|
|
|
|
assert(PPC::ACCRCRegClass.contains(Acc) &&
|
|
|
|
"Unexpected register for XXMTACC");
|
|
|
|
Candidates[Acc - PPC::ACC0] = &BBI;
|
|
|
|
}
|
|
|
|
// If we are visiting a xxmfacc instruction and its operand register is
|
|
|
|
// in the candidate set, we mark the two instructions for removal.
|
|
|
|
else if (Opc == PPC::XXMFACC) {
|
|
|
|
Register Acc = BBI.getOperand(0).getReg();
|
|
|
|
assert(PPC::ACCRCRegClass.contains(Acc) &&
|
|
|
|
"Unexpected register for XXMFACC");
|
|
|
|
if (!Candidates[Acc - PPC::ACC0])
|
|
|
|
continue;
|
|
|
|
InstrsToErase.insert(&BBI);
|
|
|
|
InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
|
|
|
|
}
|
|
|
|
// If we are visiting an instruction using an accumulator register
|
|
|
|
// as operand, we remove it from the candidate set.
|
|
|
|
else {
|
|
|
|
for (MachineOperand &Operand : BBI.operands()) {
|
|
|
|
if (!Operand.isReg())
|
|
|
|
continue;
|
|
|
|
Register Reg = Operand.getReg();
|
|
|
|
if (PPC::ACCRCRegClass.contains(Reg))
|
|
|
|
Candidates[Reg - PPC::ACC0] = nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (MachineInstr *MI : InstrsToErase)
|
|
|
|
MI->eraseFromParent();
|
|
|
|
NumRemovedInPreEmit += InstrsToErase.size();
|
|
|
|
return !InstrsToErase.empty();
|
|
|
|
}
|
|
|
|
|
2017-12-15 15:27:53 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
[PowerPC] Remove redundant CRSET/CRUNSET in custom lowering of known CR bit spills
We lower known CR bit spills (CRSET/CRUNSET) to load and spill the known value
but forgot to remove the redundant spills.
e.g., This sequence was used to spill a CRUNSET:
crclr 4*cr5+lt
mfocrf r3,4
rlwinm r3,r3,20,0,0
stw r3,132(r1)
Custom lowering of known CR bit spills lower it to:
crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
li r3,0
stw r3,132(r1)
crxor is redundant if there is no use of 4*cr5+lt so we should remove it
Differential revision: https://reviews.llvm.org/D67722
2019-09-12 14:49:02 +08:00
|
|
|
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
|
|
|
|
// Remove UNENCODED_NOP even when this pass is disabled.
|
|
|
|
// This needs to be done unconditionally so we don't emit zeros
|
|
|
|
// in the instruction stream.
|
|
|
|
SmallVector<MachineInstr *, 4> InstrsToErase;
|
|
|
|
for (MachineBasicBlock &MBB : MF)
|
|
|
|
for (MachineInstr &MI : MBB)
|
|
|
|
if (MI.getOpcode() == PPC::UNENCODED_NOP)
|
|
|
|
InstrsToErase.push_back(&MI);
|
|
|
|
for (MachineInstr *MI : InstrsToErase)
|
|
|
|
MI->eraseFromParent();
|
2017-12-15 15:27:53 +08:00
|
|
|
return false;
|
[PowerPC] Remove redundant CRSET/CRUNSET in custom lowering of known CR bit spills
We lower known CR bit spills (CRSET/CRUNSET) to load and spill the known value
but forgot to remove the redundant spills.
e.g., This sequence was used to spill a CRUNSET:
crclr 4*cr5+lt
mfocrf r3,4
rlwinm r3,r3,20,0,0
stw r3,132(r1)
Custom lowering of known CR bit spills lower it to:
crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
li r3,0
stw r3,132(r1)
crxor is redundant if there is no use of 4*cr5+lt so we should remove it
Differential revision: https://reviews.llvm.org/D67722
2019-09-12 14:49:02 +08:00
|
|
|
}
|
2017-12-15 15:27:53 +08:00
|
|
|
bool Changed = false;
|
|
|
|
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
|
2018-09-26 20:32:45 +08:00
|
|
|
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
2017-12-15 15:27:53 +08:00
|
|
|
SmallVector<MachineInstr *, 4> InstrsToErase;
|
|
|
|
for (MachineBasicBlock &MBB : MF) {
|
2019-07-24 03:11:07 +08:00
|
|
|
Changed |= removeRedundantLIs(MBB, TRI);
|
[PowerPC] Add linker opt for PC Relative GOT indirect accesses
A linker optimization is available on PowerPC for GOT indirect PCRelative loads.
The idea is that we can mark a usual GOT indirect load:
pld 3, vec@got@pcrel(0), 1
lwa 3, 4(3)
With a relocation to say that if we don't need to go through the GOT we can let
the linker further optimize this and replace a load with a nop.
pld 3, vec@got@pcrel(0), 1
.Lpcrel1:
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
lwa 3, 4(3)
This patch adds the logic that allows the compiler to add the R_PPC64_PCREL_OPT.
Reviewers: nemanjai, lei, hfinkel, sfertile, efriedma, tstellar, grosbach
Reviewed By: nemanjai
Differential Revision: https://reviews.llvm.org/D79864
2020-07-22 03:29:54 +08:00
|
|
|
Changed |= addLinkerOpt(MBB, TRI);
|
2020-11-19 04:58:28 +08:00
|
|
|
Changed |= removeAccPrimeUnprime(MBB);
|
2017-12-15 15:27:53 +08:00
|
|
|
for (MachineInstr &MI : MBB) {
|
2018-10-09 18:54:04 +08:00
|
|
|
unsigned Opc = MI.getOpcode();
|
[PowerPC] Remove redundant CRSET/CRUNSET in custom lowering of known CR bit spills
We lower known CR bit spills (CRSET/CRUNSET) to load and spill the known value
but forgot to remove the redundant spills.
e.g., This sequence was used to spill a CRUNSET:
crclr 4*cr5+lt
mfocrf r3,4
rlwinm r3,r3,20,0,0
stw r3,132(r1)
Custom lowering of known CR bit spills lower it to:
crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
li r3,0
stw r3,132(r1)
crxor is redundant if there is no use of 4*cr5+lt so we should remove it
Differential revision: https://reviews.llvm.org/D67722
2019-09-12 14:49:02 +08:00
|
|
|
if (Opc == PPC::UNENCODED_NOP) {
|
|
|
|
InstrsToErase.push_back(&MI);
|
|
|
|
continue;
|
|
|
|
}
|
2018-10-09 18:54:04 +08:00
|
|
|
// Detect self copies - these can result from running AADB.
|
|
|
|
if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
|
|
|
|
const MCInstrDesc &MCID = TII->get(Opc);
|
|
|
|
if (MCID.getNumOperands() == 3 &&
|
|
|
|
MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
|
|
|
|
MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
|
|
|
|
NumberOfSelfCopies++;
|
|
|
|
LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
|
|
|
|
LLVM_DEBUG(MI.dump());
|
|
|
|
InstrsToErase.push_back(&MI);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if (MCID.getNumOperands() == 2 &&
|
|
|
|
MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
|
|
|
|
NumberOfSelfCopies++;
|
|
|
|
LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
|
|
|
|
LLVM_DEBUG(MI.dump());
|
|
|
|
InstrsToErase.push_back(&MI);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2017-12-15 15:27:53 +08:00
|
|
|
MachineInstr *DefMIToErase = nullptr;
|
|
|
|
if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
|
|
|
|
Changed = true;
|
|
|
|
NumRRConvertedInPreEmit++;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
|
|
|
|
LLVM_DEBUG(MI.dump());
|
2017-12-15 15:27:53 +08:00
|
|
|
if (DefMIToErase) {
|
|
|
|
InstrsToErase.push_back(DefMIToErase);
|
|
|
|
}
|
|
|
|
}
|
2019-10-25 16:13:30 +08:00
|
|
|
if (TII->foldFrameOffset(MI)) {
|
|
|
|
Changed = true;
|
|
|
|
NumFrameOffFoldInPreEmit++;
|
|
|
|
LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
|
|
|
|
LLVM_DEBUG(MI.dump());
|
|
|
|
}
|
2017-12-15 15:27:53 +08:00
|
|
|
}
|
2018-09-26 20:32:45 +08:00
|
|
|
|
|
|
|
// Eliminate conditional branch based on a constant CR bit by
|
|
|
|
// CRSET or CRUNSET. We eliminate the conditional branch or
|
|
|
|
// convert it into an unconditional branch. Also, if the CR bit
|
|
|
|
// is not used by other instructions, we eliminate CRSET as well.
|
|
|
|
auto I = MBB.getFirstInstrTerminator();
|
|
|
|
if (I == MBB.instr_end())
|
|
|
|
continue;
|
|
|
|
MachineInstr *Br = &*I;
|
|
|
|
if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
|
|
|
|
continue;
|
|
|
|
MachineInstr *CRSetMI = nullptr;
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register CRBit = Br->getOperand(0).getReg();
|
2018-09-26 20:32:45 +08:00
|
|
|
unsigned CRReg = getCRFromCRBit(CRBit);
|
|
|
|
bool SeenUse = false;
|
|
|
|
MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
|
|
|
|
for (It++; It != Er; It++) {
|
|
|
|
if (It->modifiesRegister(CRBit, TRI)) {
|
|
|
|
if ((It->getOpcode() == PPC::CRUNSET ||
|
|
|
|
It->getOpcode() == PPC::CRSET) &&
|
|
|
|
It->getOperand(0).getReg() == CRBit)
|
|
|
|
CRSetMI = &*It;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (It->readsRegister(CRBit, TRI))
|
|
|
|
SeenUse = true;
|
|
|
|
}
|
|
|
|
if (!CRSetMI) continue;
|
|
|
|
|
|
|
|
unsigned CRSetOp = CRSetMI->getOpcode();
|
|
|
|
if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
|
|
|
|
(Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
|
|
|
|
// Remove this branch since it cannot be taken.
|
|
|
|
InstrsToErase.push_back(Br);
|
|
|
|
MBB.removeSuccessor(Br->getOperand(1).getMBB());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// This conditional branch is always taken. So, remove all branches
|
|
|
|
// and insert an unconditional branch to the destination of this.
|
|
|
|
MachineBasicBlock::iterator It = Br, Er = MBB.end();
|
2019-01-03 01:07:23 +08:00
|
|
|
for (; It != Er; It++) {
|
2018-09-26 20:32:45 +08:00
|
|
|
if (It->isDebugInstr()) continue;
|
|
|
|
assert(It->isTerminator() && "Non-terminator after a terminator");
|
|
|
|
InstrsToErase.push_back(&*It);
|
|
|
|
}
|
|
|
|
if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
|
|
|
|
ArrayRef<MachineOperand> NoCond;
|
|
|
|
TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
|
|
|
|
NoCond, Br->getDebugLoc());
|
|
|
|
}
|
|
|
|
for (auto &Succ : MBB.successors())
|
|
|
|
if (Succ != Br->getOperand(1).getMBB()) {
|
|
|
|
MBB.removeSuccessor(Succ);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the CRBit is not used by another instruction, we can eliminate
|
|
|
|
// CRSET/CRUNSET instruction.
|
|
|
|
if (!SeenUse) {
|
|
|
|
// We need to check use of the CRBit in successors.
|
|
|
|
for (auto &SuccMBB : MBB.successors())
|
|
|
|
if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
|
|
|
|
SeenUse = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!SeenUse)
|
|
|
|
InstrsToErase.push_back(CRSetMI);
|
|
|
|
}
|
2017-12-15 15:27:53 +08:00
|
|
|
}
|
|
|
|
for (MachineInstr *MI : InstrsToErase) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
|
|
|
|
LLVM_DEBUG(MI->dump());
|
2017-12-15 15:27:53 +08:00
|
|
|
MI->eraseFromParent();
|
|
|
|
NumRemovedInPreEmit++;
|
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
|
|
|
|
false, false)
|
|
|
|
char PPCPreEmitPeephole::ID = 0;
|
|
|
|
|
|
|
|
FunctionPass *llvm::createPPCPreEmitPeepholePass() {
|
|
|
|
return new PPCPreEmitPeephole();
|
|
|
|
}
|