2020-02-18 00:47:45 +08:00
|
|
|
//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This pass does combining of machine instructions at the generic MI level,
|
|
|
|
// after the legalizer.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AMDGPUTargetMachine.h"
|
|
|
|
#include "AMDGPULegalizerInfo.h"
|
|
|
|
#include "llvm/CodeGen/GlobalISel/Combiner.h"
|
|
|
|
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
|
|
|
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
|
|
|
|
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
|
|
|
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
|
|
|
#include "llvm/CodeGen/MachineDominators.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace MIPatternMatch;
|
|
|
|
|
|
|
|
struct FMinFMaxLegacyInfo {
|
|
|
|
Register LHS;
|
|
|
|
Register RHS;
|
|
|
|
Register True;
|
|
|
|
Register False;
|
|
|
|
CmpInst::Predicate Pred;
|
|
|
|
};
|
|
|
|
|
|
|
|
// TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
|
|
|
|
static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineFunction &MF, FMinFMaxLegacyInfo &Info) {
|
|
|
|
// FIXME: Combines should have subtarget predicates, and we shouldn't need
|
|
|
|
// this here.
|
|
|
|
if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// FIXME: Type predicate on pattern
|
|
|
|
if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Register Cond = MI.getOperand(1).getReg();
|
|
|
|
if (!MRI.hasOneNonDBGUse(Cond) ||
|
|
|
|
!mi_match(Cond, MRI,
|
|
|
|
m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Info.True = MI.getOperand(2).getReg();
|
|
|
|
Info.False = MI.getOperand(3).getReg();
|
|
|
|
|
|
|
|
if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
|
|
|
|
!(Info.LHS == Info.False && Info.RHS == Info.True))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (Info.Pred) {
|
|
|
|
case CmpInst::FCMP_FALSE:
|
|
|
|
case CmpInst::FCMP_OEQ:
|
|
|
|
case CmpInst::FCMP_ONE:
|
|
|
|
case CmpInst::FCMP_ORD:
|
|
|
|
case CmpInst::FCMP_UNO:
|
|
|
|
case CmpInst::FCMP_UEQ:
|
|
|
|
case CmpInst::FCMP_UNE:
|
|
|
|
case CmpInst::FCMP_TRUE:
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
|
|
|
|
const FMinFMaxLegacyInfo &Info) {
|
|
|
|
|
|
|
|
auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) {
|
|
|
|
MachineIRBuilder MIB(MI);
|
|
|
|
MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
|
|
|
|
};
|
|
|
|
|
|
|
|
switch (Info.Pred) {
|
|
|
|
case CmpInst::FCMP_ULT:
|
|
|
|
case CmpInst::FCMP_ULE:
|
|
|
|
if (Info.LHS == Info.True)
|
|
|
|
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
|
|
|
|
else
|
|
|
|
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_OLE:
|
|
|
|
case CmpInst::FCMP_OLT: {
|
|
|
|
// We need to permute the operands to get the correct NaN behavior. The
|
|
|
|
// selected operand is the second one based on the failing compare with NaN,
|
|
|
|
// so permute it based on the compare type the hardware uses.
|
|
|
|
if (Info.LHS == Info.True)
|
|
|
|
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
|
|
|
|
else
|
|
|
|
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case CmpInst::FCMP_UGE:
|
|
|
|
case CmpInst::FCMP_UGT: {
|
|
|
|
if (Info.LHS == Info.True)
|
|
|
|
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
|
|
|
|
else
|
|
|
|
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case CmpInst::FCMP_OGT:
|
|
|
|
case CmpInst::FCMP_OGE: {
|
|
|
|
if (Info.LHS == Info.True)
|
|
|
|
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
|
|
|
|
else
|
|
|
|
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
llvm_unreachable("predicate should not have matched");
|
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2020-03-25 22:45:07 +08:00
|
|
|
static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineFunction &MF, CombinerHelper &Helper) {
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
|
|
|
|
// TODO: We could try to match extracting the higher bytes, which would be
|
|
|
|
// easier if i8 vectors weren't promoted to i32 vectors, particularly after
|
|
|
|
// types are legalized. v4i8 -> v4f32 is probably the only case to worry
|
|
|
|
// about in practice.
|
|
|
|
LLT Ty = MRI.getType(DstReg);
|
|
|
|
if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
|
2020-06-12 22:19:17 +08:00
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
|
|
|
unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
|
|
|
|
assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
|
|
|
|
const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
|
|
|
|
return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
|
2020-03-25 22:45:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void applyUCharToFloat(MachineInstr &MI) {
|
|
|
|
MachineIRBuilder B(MI);
|
|
|
|
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2020-06-12 22:19:17 +08:00
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
2020-03-25 22:45:07 +08:00
|
|
|
LLT Ty = B.getMRI()->getType(DstReg);
|
2020-06-12 22:19:17 +08:00
|
|
|
LLT SrcTy = B.getMRI()->getType(SrcReg);
|
|
|
|
if (SrcTy != S32)
|
|
|
|
SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
|
2020-03-25 22:45:07 +08:00
|
|
|
|
|
|
|
if (Ty == S32) {
|
|
|
|
B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
|
2020-06-12 22:19:17 +08:00
|
|
|
{SrcReg}, MI.getFlags());
|
2020-03-25 22:45:07 +08:00
|
|
|
} else {
|
|
|
|
auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
|
2020-06-12 22:19:17 +08:00
|
|
|
{SrcReg}, MI.getFlags());
|
2020-03-25 22:45:07 +08:00
|
|
|
B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
|
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
}
|
2020-02-18 00:47:45 +08:00
|
|
|
|
2020-03-30 00:34:35 +08:00
|
|
|
// FIXME: Should be able to have 2 separate matchdatas rather than custom struct
|
|
|
|
// boilerplate.
|
|
|
|
struct CvtF32UByteMatchInfo {
|
|
|
|
Register CvtVal;
|
|
|
|
unsigned ShiftOffset;
|
|
|
|
};
|
|
|
|
|
|
|
|
static bool matchCvtF32UByteN(MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineFunction &MF,
|
|
|
|
CvtF32UByteMatchInfo &MatchInfo) {
|
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
|
|
|
|
|
|
|
// Look through G_ZEXT.
|
|
|
|
mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
|
|
|
|
|
|
|
|
Register Src0;
|
|
|
|
int64_t ShiftAmt;
|
|
|
|
bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
|
|
|
|
if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
|
|
|
|
const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
|
|
|
|
|
|
|
|
unsigned ShiftOffset = 8 * Offset;
|
|
|
|
if (IsShr)
|
|
|
|
ShiftOffset += ShiftAmt;
|
|
|
|
else
|
|
|
|
ShiftOffset -= ShiftAmt;
|
|
|
|
|
|
|
|
MatchInfo.CvtVal = Src0;
|
|
|
|
MatchInfo.ShiftOffset = ShiftOffset;
|
|
|
|
return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: Simplify demanded bits.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void applyCvtF32UByteN(MachineInstr &MI,
|
|
|
|
const CvtF32UByteMatchInfo &MatchInfo) {
|
|
|
|
MachineIRBuilder B(MI);
|
|
|
|
unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
|
|
|
|
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
Register CvtSrc = MatchInfo.CvtVal;
|
|
|
|
LLT SrcTy = B.getMRI()->getType(MatchInfo.CvtVal);
|
|
|
|
if (SrcTy != S32) {
|
|
|
|
assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
|
|
|
|
CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(MI.getOpcode() != NewOpc);
|
|
|
|
B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
|
|
|
|
MI.eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2020-02-18 00:47:45 +08:00
|
|
|
#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
|
|
|
#include "AMDGPUGenPostLegalizeGICombiner.inc"
|
|
|
|
#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
|
|
|
|
#include "AMDGPUGenPostLegalizeGICombiner.inc"
|
|
|
|
#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
|
|
|
|
|
2020-07-26 22:52:51 +08:00
|
|
|
class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
|
2020-02-18 00:47:45 +08:00
|
|
|
GISelKnownBits *KB;
|
|
|
|
MachineDominatorTree *MDT;
|
|
|
|
|
|
|
|
public:
|
[gicombiner] Allow generated combiners to store additional members
Summary:
Adds the ability to add members to a generated combiner via
a State base class. In the current AArch64PreLegalizerCombiner
this is used to make Helper available without having to
provide it to every call.
As part of this, split the command line processing into a
separate object so that it still only runs once even though
the generated combiner is constructed more frequently.
Depends on D81862
Reviewers: aditya_nandakumar, bogner, volkan, aemerson, paquette, arsenm
Reviewed By: arsenm
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81863
2020-06-17 05:15:36 +08:00
|
|
|
AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
|
2020-02-18 00:47:45 +08:00
|
|
|
|
|
|
|
AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
|
|
|
|
const AMDGPULegalizerInfo *LI,
|
|
|
|
GISelKnownBits *KB, MachineDominatorTree *MDT)
|
|
|
|
: CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
|
|
|
|
/*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
|
|
|
|
KB(KB), MDT(MDT) {
|
[gicombiner] Allow generated combiners to store additional members
Summary:
Adds the ability to add members to a generated combiner via
a State base class. In the current AArch64PreLegalizerCombiner
this is used to make Helper available without having to
provide it to every call.
As part of this, split the command line processing into a
separate object so that it still only runs once even though
the generated combiner is constructed more frequently.
Depends on D81862
Reviewers: aditya_nandakumar, bogner, volkan, aemerson, paquette, arsenm
Reviewed By: arsenm
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81863
2020-06-17 05:15:36 +08:00
|
|
|
if (!GeneratedRuleCfg.parseCommandLineOption())
|
2020-02-18 00:47:45 +08:00
|
|
|
report_fatal_error("Invalid rule identifier");
|
|
|
|
}
|
|
|
|
|
2020-03-29 07:04:47 +08:00
|
|
|
bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
|
|
|
|
MachineIRBuilder &B) const override;
|
2020-02-18 00:47:45 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
|
|
|
|
MachineInstr &MI,
|
|
|
|
MachineIRBuilder &B) const {
|
2020-08-16 03:14:11 +08:00
|
|
|
CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
|
[gicombiner] Allow generated combiners to store additional members
Summary:
Adds the ability to add members to a generated combiner via
a State base class. In the current AArch64PreLegalizerCombiner
this is used to make Helper available without having to
provide it to every call.
As part of this, split the command line processing into a
separate object so that it still only runs once even though
the generated combiner is constructed more frequently.
Depends on D81862
Reviewers: aditya_nandakumar, bogner, volkan, aemerson, paquette, arsenm
Reviewed By: arsenm
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81863
2020-06-17 05:15:36 +08:00
|
|
|
AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
|
2020-02-18 00:47:45 +08:00
|
|
|
|
|
|
|
if (Generated.tryCombineAll(Observer, MI, B, Helper))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
case TargetOpcode::G_SHL:
|
|
|
|
case TargetOpcode::G_LSHR:
|
|
|
|
case TargetOpcode::G_ASHR:
|
|
|
|
// On some subtargets, 64-bit shift is a quarter rate instruction. In the
|
|
|
|
// common case, splitting this into a move and a 32-bit shift is faster and
|
|
|
|
// the same code size.
|
|
|
|
return Helper.tryCombineShiftToUnmerge(MI, 32);
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
|
|
|
|
#include "AMDGPUGenPostLegalizeGICombiner.inc"
|
|
|
|
#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
|
|
|
|
|
|
|
|
// Pass boilerplate
|
|
|
|
// ================
|
|
|
|
|
|
|
|
class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
|
|
|
|
public:
|
|
|
|
static char ID;
|
|
|
|
|
|
|
|
AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
|
|
|
|
|
|
|
|
StringRef getPassName() const override {
|
|
|
|
return "AMDGPUPostLegalizerCombiner";
|
|
|
|
}
|
|
|
|
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
|
|
|
private:
|
|
|
|
bool IsOptNone;
|
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
|
|
AU.addRequired<TargetPassConfig>();
|
|
|
|
AU.setPreservesCFG();
|
|
|
|
getSelectionDAGFallbackAnalysisUsage(AU);
|
|
|
|
AU.addRequired<GISelKnownBitsAnalysis>();
|
|
|
|
AU.addPreserved<GISelKnownBitsAnalysis>();
|
|
|
|
if (!IsOptNone) {
|
|
|
|
AU.addRequired<MachineDominatorTree>();
|
|
|
|
AU.addPreserved<MachineDominatorTree>();
|
|
|
|
}
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
|
|
|
AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
|
|
|
|
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
|
|
|
|
initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
|
|
|
|
if (MF.getProperties().hasProperty(
|
|
|
|
MachineFunctionProperties::Property::FailedISel))
|
|
|
|
return false;
|
|
|
|
auto *TPC = &getAnalysis<TargetPassConfig>();
|
|
|
|
const Function &F = MF.getFunction();
|
|
|
|
bool EnableOpt =
|
|
|
|
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
|
|
|
|
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const AMDGPULegalizerInfo *LI
|
|
|
|
= static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
|
|
|
|
|
|
|
|
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
|
|
|
|
MachineDominatorTree *MDT =
|
|
|
|
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
|
|
|
|
AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
|
|
|
|
F.hasMinSize(), LI, KB, MDT);
|
|
|
|
Combiner C(PCInfo, TPC);
|
|
|
|
return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
char AMDGPUPostLegalizerCombiner::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
|
|
|
|
"Combine AMDGPU machine instrs after legalization",
|
|
|
|
false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
|
|
|
|
INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
|
|
|
|
"Combine AMDGPU machine instrs after legalization", false,
|
|
|
|
false)
|
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
|
|
|
|
return new AMDGPUPostLegalizerCombiner(IsOptNone);
|
|
|
|
}
|
|
|
|
} // end namespace llvm
|