forked from OSchip/llvm-project
167 lines
5.4 KiB
C++
167 lines
5.4 KiB
C++
//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// The QPX vector registers overlay the scalar floating-point registers, and
|
|
// any scalar floating-point loads splat their value across all vector lanes.
|
|
// Thus, if we have a scalar load followed by a splat, we can remove the splat
|
|
// (i.e. replace the load with a load-and-splat pseudo instruction).
|
|
//
|
|
// This pass must run after anything that might do store-to-load forwarding.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "PPC.h"
|
|
#include "PPCInstrBuilder.h"
|
|
#include "PPCInstrInfo.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
|
#include "llvm/Support/MathExtras.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "ppc-qpx-load-splat"
|
|
|
|
STATISTIC(NumSimplified, "Number of QPX load splats simplified");
|
|
|
|
namespace llvm {
|
|
void initializePPCQPXLoadSplatPass(PassRegistry&);
|
|
}
|
|
|
|
namespace {
|
|
struct PPCQPXLoadSplat : public MachineFunctionPass {
|
|
static char ID;
|
|
PPCQPXLoadSplat() : MachineFunctionPass(ID) {
|
|
initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &Fn) override;
|
|
|
|
StringRef getPassName() const override {
|
|
return "PowerPC QPX Load Splat Simplification";
|
|
}
|
|
};
|
|
char PPCQPXLoadSplat::ID = 0;
|
|
}
|
|
|
|
INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
|
|
"PowerPC QPX Load Splat Simplification",
|
|
false, false)
|
|
|
|
FunctionPass *llvm::createPPCQPXLoadSplatPass() {
|
|
return new PPCQPXLoadSplat();
|
|
}
|
|
|
|
bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
|
|
if (skipFunction(*MF.getFunction()))
|
|
return false;
|
|
|
|
bool MadeChange = false;
|
|
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
|
|
|
for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
|
|
MachineBasicBlock *MBB = &*MFI;
|
|
SmallVector<MachineInstr *, 4> Splats;
|
|
|
|
for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
|
|
MachineInstr *MI = &*MBBI;
|
|
|
|
if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
|
|
Splats.clear();
|
|
continue;
|
|
}
|
|
|
|
// We're looking for a sequence like this:
|
|
// %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
|
|
// %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
|
|
|
|
for (auto SI = Splats.begin(); SI != Splats.end();) {
|
|
MachineInstr *SMI = *SI;
|
|
unsigned SplatReg = SMI->getOperand(0).getReg();
|
|
unsigned SrcReg = SMI->getOperand(1).getReg();
|
|
|
|
if (MI->modifiesRegister(SrcReg, TRI)) {
|
|
switch (MI->getOpcode()) {
|
|
default:
|
|
SI = Splats.erase(SI);
|
|
continue;
|
|
case PPC::LFS:
|
|
case PPC::LFD:
|
|
case PPC::LFSU:
|
|
case PPC::LFDU:
|
|
case PPC::LFSUX:
|
|
case PPC::LFDUX:
|
|
case PPC::LFSX:
|
|
case PPC::LFDX:
|
|
case PPC::LFIWAX:
|
|
case PPC::LFIWZX:
|
|
if (SplatReg != SrcReg) {
|
|
// We need to change the load to define the scalar subregister of
|
|
// the QPX splat source register.
|
|
unsigned SubRegIndex =
|
|
TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
|
|
unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
|
|
|
|
// Substitute both the explicit defined register, and also the
|
|
// implicit def of the containing QPX register.
|
|
MI->getOperand(0).setReg(SplatSubReg);
|
|
MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
|
|
}
|
|
|
|
SI = Splats.erase(SI);
|
|
|
|
// If SMI is directly after MI, then MBBI's base iterator is
|
|
// pointing at SMI. Adjust MBBI around the call to erase SMI to
|
|
// avoid invalidating MBBI.
|
|
++MBBI;
|
|
SMI->eraseFromParent();
|
|
--MBBI;
|
|
|
|
++NumSimplified;
|
|
MadeChange = true;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If this instruction defines the splat register, then we cannot move
|
|
// the previous definition above it. If it reads from the splat
|
|
// register, then it must already be alive from some previous
|
|
// definition, and if the splat register is different from the source
|
|
// register, then this definition must not be the load for which we're
|
|
// searching.
|
|
if (MI->modifiesRegister(SplatReg, TRI) ||
|
|
(SrcReg != SplatReg &&
|
|
MI->readsRegister(SplatReg, TRI))) {
|
|
SI = Splats.erase(SI);
|
|
continue;
|
|
}
|
|
|
|
++SI;
|
|
}
|
|
|
|
if (MI->getOpcode() != PPC::QVESPLATI &&
|
|
MI->getOpcode() != PPC::QVESPLATIs &&
|
|
MI->getOpcode() != PPC::QVESPLATIb)
|
|
continue;
|
|
if (MI->getOperand(2).getImm() != 0)
|
|
continue;
|
|
|
|
// If there are other uses of the scalar value after this, replacing
|
|
// those uses might be non-trivial.
|
|
if (!MI->getOperand(1).isKill())
|
|
continue;
|
|
|
|
Splats.push_back(MI);
|
|
}
|
|
}
|
|
|
|
return MadeChange;
|
|
}
|