llvm-project/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp

162 lines
5.3 KiB
C++
Raw Normal View History

//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The QPX vector registers overlay the scalar floating-point registers, and
// any scalar floating-point loads splat their value across all vector lanes.
// Thus, if we have a scalar load followed by a splat, we can remove the splat
// (i.e. replace the load with a load-and-splat pseudo instruction).
//
// This pass must run after anything that might do store-to-load forwarding.
//
//===----------------------------------------------------------------------===//
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-qpx-load-splat"
STATISTIC(NumSimplified, "Number of QPX load splats simplified");
namespace {
struct PPCQPXLoadSplat : public MachineFunctionPass {
static char ID;
PPCQPXLoadSplat() : MachineFunctionPass(ID) {
initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &Fn) override;
StringRef getPassName() const override {
return "PowerPC QPX Load Splat Simplification";
}
};
char PPCQPXLoadSplat::ID = 0;
}
INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
"PowerPC QPX Load Splat Simplification",
false, false)
FunctionPass *llvm::createPPCQPXLoadSplatPass() {
return new PPCQPXLoadSplat();
}
bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
bool MadeChange = false;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
MachineBasicBlock *MBB = &*MFI;
SmallVector<MachineInstr *, 4> Splats;
for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
MachineInstr *MI = &*MBBI;
if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
Splats.clear();
continue;
}
// We're looking for a sequence like this:
[CodeGen] Use MachineOperand::print in the MIRPrinter for MO_Register. Work towards the unification of MIR and debug output by refactoring the interfaces. For MachineOperand::print, keep a simple version that can be easily called from `dump()`, and a more complex one which will be called from both the MIRPrinter and MachineInstr::print. Add extra checks inside MachineOperand for detached operands (operands with getParent() == nullptr). https://reviews.llvm.org/D40836 * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/kill: ([^ ]+) ([^ ]+)<def> ([^ ]+)/kill: \1 def \2 \3/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/kill: ([^ ]+) ([^ ]+) ([^ ]+)<def>/kill: \1 \2 def \3/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/kill: def ([^ ]+) ([^ ]+) ([^ ]+)<def>/kill: def \1 \2 def \3/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/<def>//g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<kill>/killed \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<imp-use,kill>/implicit killed \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<dead>/dead \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<def[ ]*,[ ]*dead>/dead \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<imp-def[ ]*,[ ]*dead>/implicit-def dead \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<imp-def>/implicit-def \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<imp-use>/implicit \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<internal>/internal \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<undef>/undef \1/g' llvm-svn: 320022
2017-12-07 18:40:31 +08:00
// %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
// %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
for (auto SI = Splats.begin(); SI != Splats.end();) {
MachineInstr *SMI = *SI;
unsigned SplatReg = SMI->getOperand(0).getReg();
unsigned SrcReg = SMI->getOperand(1).getReg();
if (MI->modifiesRegister(SrcReg, TRI)) {
switch (MI->getOpcode()) {
default:
SI = Splats.erase(SI);
continue;
case PPC::LFS:
case PPC::LFD:
case PPC::LFSU:
case PPC::LFDU:
case PPC::LFSUX:
case PPC::LFDUX:
case PPC::LFSX:
case PPC::LFDX:
case PPC::LFIWAX:
case PPC::LFIWZX:
if (SplatReg != SrcReg) {
// We need to change the load to define the scalar subregister of
// the QPX splat source register.
unsigned SubRegIndex =
TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
// Substitute both the explicit defined register, and also the
// implicit def of the containing QPX register.
MI->getOperand(0).setReg(SplatSubReg);
MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
}
SI = Splats.erase(SI);
// If SMI is directly after MI, then MBBI's base iterator is
// pointing at SMI. Adjust MBBI around the call to erase SMI to
// avoid invalidating MBBI.
++MBBI;
SMI->eraseFromParent();
--MBBI;
++NumSimplified;
MadeChange = true;
continue;
}
}
2016-06-20 08:37:41 +08:00
// If this instruction defines the splat register, then we cannot move
// the previous definition above it. If it reads from the splat
// register, then it must already be alive from some previous
// definition, and if the splat register is different from the source
// register, then this definition must not be the load for which we're
// searching.
if (MI->modifiesRegister(SplatReg, TRI) ||
(SrcReg != SplatReg &&
MI->readsRegister(SplatReg, TRI))) {
SI = Splats.erase(SI);
continue;
}
++SI;
}
if (MI->getOpcode() != PPC::QVESPLATI &&
MI->getOpcode() != PPC::QVESPLATIs &&
MI->getOpcode() != PPC::QVESPLATIb)
continue;
if (MI->getOperand(2).getImm() != 0)
continue;
// If there are other uses of the scalar value after this, replacing
// those uses might be non-trivial.
if (!MI->getOperand(1).isKill())
continue;
Splats.push_back(MI);
}
}
return MadeChange;
}