forked from OSchip/llvm-project
[AMDGPU] Pass to propagate ABI attributes from kernels to the functions
The pass works in two modes: Mode 1: Just set attributes starting from kernels. This can work at the very beginning of opt and llc pipeline, but cannot clone functions because it must be a function pass. Mode 2: Actually clone functions for new attributes. This can only work after all function passes in the opt pipeline because it has to be a module pass. Differential Revision: https://reviews.llvm.org/D63208 llvm-svn: 363586
This commit is contained in:
parent
b8e8b1769f
commit
ad04e7ad42
|
@ -57,6 +57,8 @@ FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
|
||||||
FunctionPass *createAMDGPUUseNativeCallsPass();
|
FunctionPass *createAMDGPUUseNativeCallsPass();
|
||||||
FunctionPass *createAMDGPUCodeGenPreparePass();
|
FunctionPass *createAMDGPUCodeGenPreparePass();
|
||||||
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
|
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
|
||||||
|
FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
|
||||||
|
ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
|
||||||
FunctionPass *createAMDGPURewriteOutArgumentsPass();
|
FunctionPass *createAMDGPURewriteOutArgumentsPass();
|
||||||
FunctionPass *createSIModeRegisterPass();
|
FunctionPass *createSIModeRegisterPass();
|
||||||
|
|
||||||
|
@ -91,6 +93,12 @@ ModulePass *createAMDGPULowerKernelAttributesPass();
|
||||||
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
|
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
|
||||||
extern char &AMDGPULowerKernelAttributesID;
|
extern char &AMDGPULowerKernelAttributesID;
|
||||||
|
|
||||||
|
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
|
||||||
|
extern char &AMDGPUPropagateAttributesEarlyID;
|
||||||
|
|
||||||
|
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
|
||||||
|
extern char &AMDGPUPropagateAttributesLateID;
|
||||||
|
|
||||||
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
|
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
|
||||||
extern char &AMDGPURewriteOutArgumentsID;
|
extern char &AMDGPURewriteOutArgumentsID;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,336 @@
|
||||||
|
//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
/// \file
|
||||||
|
/// \brief This pass propagates attributes from kernels to the non-entry
|
||||||
|
/// functions. Most of the library functions were not compiled for specific ABI,
|
||||||
|
/// yet will be correctly compiled if proper attrbutes are propagated from the
|
||||||
|
/// caller.
|
||||||
|
///
|
||||||
|
/// The pass analyzes call graph and propagates ABI target features through the
|
||||||
|
/// call graph.
|
||||||
|
///
|
||||||
|
/// It can run in two modes: as a function or module pass. A function pass
|
||||||
|
/// simply propagates attributes. A module pass clones functions if there are
|
||||||
|
/// callers with different ABI. If a function is clonned all call sites will
|
||||||
|
/// be updated to use a correct clone.
|
||||||
|
///
|
||||||
|
/// A function pass is limited in functionality but can run early in the
|
||||||
|
/// pipeline. A module pass is more powerful but has to run late, so misses
|
||||||
|
/// library folding opportunities.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "amdgpu-propagate-attributes"
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDGPUSubtarget.h"
|
||||||
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||||
|
#include "Utils/AMDGPUBaseInfo.h"
|
||||||
|
#include "llvm/ADT/SmallSet.h"
|
||||||
|
#include "llvm/ADT/SmallVector.h"
|
||||||
|
#include "llvm/IR/Function.h"
|
||||||
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
#include "llvm/Transforms/Utils/Cloning.h"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class AMDGPUPropagateAttributes {
|
||||||
|
const FeatureBitset TargetFeatures = {
|
||||||
|
AMDGPU::FeatureWavefrontSize16,
|
||||||
|
AMDGPU::FeatureWavefrontSize32,
|
||||||
|
AMDGPU::FeatureWavefrontSize64
|
||||||
|
};
|
||||||
|
|
||||||
|
class Clone{
|
||||||
|
public:
|
||||||
|
Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
|
||||||
|
FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
|
||||||
|
|
||||||
|
FeatureBitset FeatureMask;
|
||||||
|
Function *OrigF;
|
||||||
|
Function *NewF;
|
||||||
|
};
|
||||||
|
|
||||||
|
const TargetMachine *TM;
|
||||||
|
|
||||||
|
// Clone functions as needed or just set attributes.
|
||||||
|
bool AllowClone;
|
||||||
|
|
||||||
|
// Option propagation roots.
|
||||||
|
SmallSet<Function *, 32> Roots;
|
||||||
|
|
||||||
|
// Clones of functions with their attributes.
|
||||||
|
SmallVector<Clone, 32> Clones;
|
||||||
|
|
||||||
|
// Find a clone with required features.
|
||||||
|
Function *findFunction(const FeatureBitset &FeaturesNeeded,
|
||||||
|
Function *OrigF);
|
||||||
|
|
||||||
|
// Clone function F and set NewFeatures on the clone.
|
||||||
|
// Cole takes the name of original function.
|
||||||
|
Function *cloneWithFeatures(Function &F,
|
||||||
|
const FeatureBitset &NewFeatures);
|
||||||
|
|
||||||
|
// Set new function's features in place.
|
||||||
|
void setFeatures(Function &F, const FeatureBitset &NewFeatures);
|
||||||
|
|
||||||
|
std::string getFeatureString(const FeatureBitset &Features) const;
|
||||||
|
|
||||||
|
// Propagate attributes from Roots.
|
||||||
|
bool process();
|
||||||
|
|
||||||
|
public:
|
||||||
|
AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
|
||||||
|
TM(TM), AllowClone(AllowClone) {}
|
||||||
|
|
||||||
|
// Use F as a root and propagate its attributes.
|
||||||
|
bool process(Function &F);
|
||||||
|
|
||||||
|
// Propagate attributes starting from kernel functions.
|
||||||
|
bool process(Module &M);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Allows to propagate attributes early, but no clonning is allowed as it must
|
||||||
|
// be a function pass to run before any optimizations.
|
||||||
|
// TODO: We shall only need a one instance of module pass, but that needs to be
|
||||||
|
// in the linker pipeline which is currently not possible.
|
||||||
|
class AMDGPUPropagateAttributesEarly : public FunctionPass {
|
||||||
|
const TargetMachine *TM;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static char ID; // Pass identification
|
||||||
|
|
||||||
|
AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
|
||||||
|
FunctionPass(ID), TM(TM) {
|
||||||
|
initializeAMDGPUPropagateAttributesEarlyPass(
|
||||||
|
*PassRegistry::getPassRegistry());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool runOnFunction(Function &F) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Allows to propagate attributes with clonning but does that late in the
|
||||||
|
// pipeline.
|
||||||
|
class AMDGPUPropagateAttributesLate : public ModulePass {
|
||||||
|
const TargetMachine *TM;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static char ID; // Pass identification
|
||||||
|
|
||||||
|
AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
|
||||||
|
ModulePass(ID), TM(TM) {
|
||||||
|
initializeAMDGPUPropagateAttributesLatePass(
|
||||||
|
*PassRegistry::getPassRegistry());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool runOnModule(Module &M) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end anonymous namespace.
|
||||||
|
|
||||||
|
char AMDGPUPropagateAttributesEarly::ID = 0;
|
||||||
|
char AMDGPUPropagateAttributesLate::ID = 0;
|
||||||
|
|
||||||
|
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
|
||||||
|
"amdgpu-propagate-attributes-early",
|
||||||
|
"Early propagate attributes from kernels to functions",
|
||||||
|
false, false)
|
||||||
|
INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
|
||||||
|
"amdgpu-propagate-attributes-late",
|
||||||
|
"Late propagate attributes from kernels to functions",
|
||||||
|
false, false)
|
||||||
|
|
||||||
|
Function *
|
||||||
|
AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
|
||||||
|
Function *OrigF) {
|
||||||
|
// TODO: search for clone's clones.
|
||||||
|
for (Clone &C : Clones)
|
||||||
|
if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
|
||||||
|
return C.NewF;
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPropagateAttributes::process(Module &M) {
|
||||||
|
for (auto &F : M.functions())
|
||||||
|
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
|
||||||
|
Roots.insert(&F);
|
||||||
|
|
||||||
|
return process();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPropagateAttributes::process(Function &F) {
|
||||||
|
Roots.insert(&F);
|
||||||
|
return process();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPropagateAttributes::process() {
|
||||||
|
bool Changed = false;
|
||||||
|
SmallSet<Function *, 32> NewRoots;
|
||||||
|
SmallSet<Function *, 32> Replaced;
|
||||||
|
|
||||||
|
if (Roots.empty())
|
||||||
|
return false;
|
||||||
|
Module &M = *(*Roots.begin())->getParent();
|
||||||
|
|
||||||
|
do {
|
||||||
|
Roots.insert(NewRoots.begin(), NewRoots.end());
|
||||||
|
NewRoots.clear();
|
||||||
|
|
||||||
|
for (auto &F : M.functions()) {
|
||||||
|
if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const FeatureBitset &CalleeBits =
|
||||||
|
TM->getSubtargetImpl(F)->getFeatureBits();
|
||||||
|
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
|
||||||
|
|
||||||
|
for (User *U : F.users()) {
|
||||||
|
Instruction *I = dyn_cast<Instruction>(U);
|
||||||
|
if (!I)
|
||||||
|
continue;
|
||||||
|
CallBase *CI = dyn_cast<CallBase>(I);
|
||||||
|
if (!CI)
|
||||||
|
continue;
|
||||||
|
Function *Caller = CI->getCaller();
|
||||||
|
if (!Caller)
|
||||||
|
continue;
|
||||||
|
if (!Roots.count(Caller))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const FeatureBitset &CallerBits =
|
||||||
|
TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
|
||||||
|
|
||||||
|
if (CallerBits == (CalleeBits & TargetFeatures)) {
|
||||||
|
NewRoots.insert(&F);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Function *NewF = findFunction(CallerBits, &F);
|
||||||
|
if (!NewF) {
|
||||||
|
FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
|
||||||
|
CallerBits);
|
||||||
|
if (!AllowClone) {
|
||||||
|
// This may set different features on different iteartions if
|
||||||
|
// there is a contradiction in callers' attributes. In this case
|
||||||
|
// we rely on a second pass running on Module, which is allowed
|
||||||
|
// to clone.
|
||||||
|
setFeatures(F, NewFeatures);
|
||||||
|
NewRoots.insert(&F);
|
||||||
|
Changed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
NewF = cloneWithFeatures(F, NewFeatures);
|
||||||
|
Clones.push_back(Clone(CallerBits, &F, NewF));
|
||||||
|
NewRoots.insert(NewF);
|
||||||
|
}
|
||||||
|
|
||||||
|
ToReplace.push_back(std::make_pair(CI, NewF));
|
||||||
|
Replaced.insert(&F);
|
||||||
|
|
||||||
|
Changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!ToReplace.empty()) {
|
||||||
|
auto R = ToReplace.pop_back_val();
|
||||||
|
R.first->setCalledFunction(R.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (!NewRoots.empty());
|
||||||
|
|
||||||
|
for (Function *F : Replaced) {
|
||||||
|
if (F->use_empty())
|
||||||
|
F->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
return Changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
Function *
|
||||||
|
AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
|
||||||
|
const FeatureBitset &NewFeatures) {
|
||||||
|
LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
|
||||||
|
|
||||||
|
ValueToValueMapTy dummy;
|
||||||
|
Function *NewF = CloneFunction(&F, dummy);
|
||||||
|
setFeatures(*NewF, NewFeatures);
|
||||||
|
|
||||||
|
// Swap names. If that is the only clone it will retain the name of now
|
||||||
|
// dead value.
|
||||||
|
if (F.hasName()) {
|
||||||
|
std::string NewName = NewF->getName();
|
||||||
|
NewF->takeName(&F);
|
||||||
|
F.setName(NewName);
|
||||||
|
|
||||||
|
// Name has changed, it does not need an external symbol.
|
||||||
|
F.setVisibility(GlobalValue::DefaultVisibility);
|
||||||
|
F.setLinkage(GlobalValue::InternalLinkage);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NewF;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDGPUPropagateAttributes::setFeatures(Function &F,
|
||||||
|
const FeatureBitset &NewFeatures) {
|
||||||
|
std::string NewFeatureStr = getFeatureString(NewFeatures);
|
||||||
|
|
||||||
|
LLVM_DEBUG(dbgs() << "Set features "
|
||||||
|
<< getFeatureString(NewFeatures & TargetFeatures)
|
||||||
|
<< " on " << F.getName() << '\n');
|
||||||
|
|
||||||
|
F.removeFnAttr("target-features");
|
||||||
|
F.addFnAttr("target-features", NewFeatureStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
|
||||||
|
{
|
||||||
|
std::string Ret;
|
||||||
|
for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
|
||||||
|
if (Features[KV.Value])
|
||||||
|
Ret += (StringRef("+") + KV.Key + ",").str();
|
||||||
|
else if (TargetFeatures[KV.Value])
|
||||||
|
Ret += (StringRef("-") + KV.Key + ",").str();
|
||||||
|
}
|
||||||
|
Ret.pop_back(); // Remove last comma.
|
||||||
|
return Ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
|
||||||
|
if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return AMDGPUPropagateAttributes(TM, false).process(F);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
|
||||||
|
if (!TM)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return AMDGPUPropagateAttributes(TM, true).process(M);
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionPass
|
||||||
|
*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
|
||||||
|
return new AMDGPUPropagateAttributesEarly(TM);
|
||||||
|
}
|
||||||
|
|
||||||
|
ModulePass
|
||||||
|
*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
|
||||||
|
return new AMDGPUPropagateAttributesLate(TM);
|
||||||
|
}
|
|
@ -217,6 +217,8 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||||
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
|
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
|
||||||
initializeAMDGPUPromoteAllocaPass(*PR);
|
initializeAMDGPUPromoteAllocaPass(*PR);
|
||||||
initializeAMDGPUCodeGenPreparePass(*PR);
|
initializeAMDGPUCodeGenPreparePass(*PR);
|
||||||
|
initializeAMDGPUPropagateAttributesEarlyPass(*PR);
|
||||||
|
initializeAMDGPUPropagateAttributesLatePass(*PR);
|
||||||
initializeAMDGPURewriteOutArgumentsPass(*PR);
|
initializeAMDGPURewriteOutArgumentsPass(*PR);
|
||||||
initializeAMDGPUUnifyMetadataPass(*PR);
|
initializeAMDGPUUnifyMetadataPass(*PR);
|
||||||
initializeSIAnnotateControlFlowPass(*PR);
|
initializeSIAnnotateControlFlowPass(*PR);
|
||||||
|
@ -402,13 +404,14 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
|
||||||
|
|
||||||
Builder.addExtension(
|
Builder.addExtension(
|
||||||
PassManagerBuilder::EP_ModuleOptimizerEarly,
|
PassManagerBuilder::EP_ModuleOptimizerEarly,
|
||||||
[Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
|
[Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
|
||||||
legacy::PassManagerBase &PM) {
|
legacy::PassManagerBase &PM) {
|
||||||
if (AMDGPUAA) {
|
if (AMDGPUAA) {
|
||||||
PM.add(createAMDGPUAAWrapperPass());
|
PM.add(createAMDGPUAAWrapperPass());
|
||||||
PM.add(createAMDGPUExternalAAWrapperPass());
|
PM.add(createAMDGPUExternalAAWrapperPass());
|
||||||
}
|
}
|
||||||
PM.add(createAMDGPUUnifyMetadataPass());
|
PM.add(createAMDGPUUnifyMetadataPass());
|
||||||
|
PM.add(createAMDGPUPropagateAttributesLatePass(this));
|
||||||
if (Internalize) {
|
if (Internalize) {
|
||||||
PM.add(createInternalizePass(mustPreserveGV));
|
PM.add(createInternalizePass(mustPreserveGV));
|
||||||
PM.add(createGlobalDCEPass());
|
PM.add(createGlobalDCEPass());
|
||||||
|
@ -420,12 +423,13 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
|
||||||
const auto &Opt = Options;
|
const auto &Opt = Options;
|
||||||
Builder.addExtension(
|
Builder.addExtension(
|
||||||
PassManagerBuilder::EP_EarlyAsPossible,
|
PassManagerBuilder::EP_EarlyAsPossible,
|
||||||
[AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
|
[AMDGPUAA, LibCallSimplify, &Opt, this](const PassManagerBuilder &,
|
||||||
legacy::PassManagerBase &PM) {
|
legacy::PassManagerBase &PM) {
|
||||||
if (AMDGPUAA) {
|
if (AMDGPUAA) {
|
||||||
PM.add(createAMDGPUAAWrapperPass());
|
PM.add(createAMDGPUAAWrapperPass());
|
||||||
PM.add(createAMDGPUExternalAAWrapperPass());
|
PM.add(createAMDGPUExternalAAWrapperPass());
|
||||||
}
|
}
|
||||||
|
PM.add(llvm::createAMDGPUPropagateAttributesEarlyPass(this));
|
||||||
PM.add(llvm::createAMDGPUUseNativeCallsPass());
|
PM.add(llvm::createAMDGPUUseNativeCallsPass());
|
||||||
if (LibCallSimplify)
|
if (LibCallSimplify)
|
||||||
PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
|
PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
|
||||||
|
@ -654,6 +658,9 @@ void AMDGPUPassConfig::addIRPasses() {
|
||||||
disablePass(&FuncletLayoutID);
|
disablePass(&FuncletLayoutID);
|
||||||
disablePass(&PatchableFunctionID);
|
disablePass(&PatchableFunctionID);
|
||||||
|
|
||||||
|
// A call to propagate attributes pass in the backend in case opt was not run.
|
||||||
|
addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
|
||||||
|
|
||||||
addPass(createAtomicExpandPass());
|
addPass(createAtomicExpandPass());
|
||||||
|
|
||||||
// This must occur before inlining, as the inliner will not look through
|
// This must occur before inlining, as the inliner will not look through
|
||||||
|
|
|
@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen
|
||||||
AMDGPUMCInstLower.cpp
|
AMDGPUMCInstLower.cpp
|
||||||
AMDGPUOpenCLEnqueuedBlockLowering.cpp
|
AMDGPUOpenCLEnqueuedBlockLowering.cpp
|
||||||
AMDGPUPromoteAlloca.cpp
|
AMDGPUPromoteAlloca.cpp
|
||||||
|
AMDGPUPropagateAttributes.cpp
|
||||||
AMDGPURegAsmNames.inc.cpp
|
AMDGPURegAsmNames.inc.cpp
|
||||||
AMDGPURegisterBankInfo.cpp
|
AMDGPURegisterBankInfo.cpp
|
||||||
AMDGPURegisterInfo.cpp
|
AMDGPURegisterInfo.cpp
|
||||||
|
|
|
@ -0,0 +1,87 @@
|
||||||
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck -check-prefix=OPT %s
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s
|
||||||
|
|
||||||
|
; OPT: declare void @foo4() local_unnamed_addr #0
|
||||||
|
; OPT: define internal fastcc void @foo3.2() unnamed_addr #1
|
||||||
|
; OPT: define void @foo2() local_unnamed_addr #1
|
||||||
|
; OPT: define internal fastcc void @foo1.1() unnamed_addr #1
|
||||||
|
; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2
|
||||||
|
; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #3
|
||||||
|
; OPT: define amdgpu_kernel void @kernel3() local_unnamed_addr #3
|
||||||
|
; OPT: define void @foo1() local_unnamed_addr #4
|
||||||
|
; OPT: define void @foo3() local_unnamed_addr #4
|
||||||
|
; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
|
||||||
|
; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
|
||||||
|
; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }
|
||||||
|
; OPT: attributes #3 = { {{.*}} "target-features"="+wavefrontsize64" }
|
||||||
|
; OPT: attributes #4 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
|
||||||
|
|
||||||
|
; LLC: foo3:
|
||||||
|
; LLC: sample asm
|
||||||
|
; LLC: foo2:
|
||||||
|
; LLC: sample asm
|
||||||
|
; LLC: foo1:
|
||||||
|
; LLC: foo4@gotpcrel32@lo+4
|
||||||
|
; LLC: foo4@gotpcrel32@hi+4
|
||||||
|
; LLC: foo3@gotpcrel32@lo+4
|
||||||
|
; LLC: foo3@gotpcrel32@hi+4
|
||||||
|
; LLC: foo2@gotpcrel32@lo+4
|
||||||
|
; LLC: foo2@gotpcrel32@hi+4
|
||||||
|
; LLC: foo1@gotpcrel32@lo+4
|
||||||
|
; LLC: foo1@gotpcrel32@hi+4
|
||||||
|
; LLC: kernel1:
|
||||||
|
; LLC: foo1@gotpcrel32@lo+4
|
||||||
|
; LLC: foo1@gotpcrel32@hi+4
|
||||||
|
; LLC: kernel2:
|
||||||
|
; LLC: foo2@gotpcrel32@lo+4
|
||||||
|
; LLC: foo2@gotpcrel32@hi+4
|
||||||
|
; LLC: kernel3:
|
||||||
|
; LLC: foo1@gotpcrel32@lo+4
|
||||||
|
; LLC: foo1@gotpcrel32@hi+4
|
||||||
|
|
||||||
|
declare void @foo4() #1
|
||||||
|
|
||||||
|
define void @foo3() #1 {
|
||||||
|
entry:
|
||||||
|
call void asm sideeffect "; sample asm", ""()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo2() #1 {
|
||||||
|
entry:
|
||||||
|
call void asm sideeffect "; sample asm", ""()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo1() #1 {
|
||||||
|
entry:
|
||||||
|
tail call void @foo4()
|
||||||
|
tail call void @foo3()
|
||||||
|
tail call void @foo2()
|
||||||
|
tail call void @foo2()
|
||||||
|
tail call void @foo1()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @kernel1() #0 {
|
||||||
|
entry:
|
||||||
|
tail call void @foo1()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @kernel2() #2 {
|
||||||
|
entry:
|
||||||
|
tail call void @foo2()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @kernel3() #3 {
|
||||||
|
entry:
|
||||||
|
tail call void @foo1()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
|
||||||
|
attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
|
||||||
|
attributes #2 = { nounwind "target-features"="+wavefrontsize64" }
|
||||||
|
attributes #3 = { nounwind "target-features"="+wavefrontsize64" }
|
|
@ -0,0 +1,72 @@
|
||||||
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck -check-prefix=OPT %s
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s
|
||||||
|
|
||||||
|
; OPT: declare void @foo4() local_unnamed_addr #0
|
||||||
|
; OPT: define void @foo3() local_unnamed_addr #1
|
||||||
|
; OPT: define void @foo2() local_unnamed_addr #1
|
||||||
|
; OPT: define void @foo1() local_unnamed_addr #1
|
||||||
|
; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2
|
||||||
|
; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #2
|
||||||
|
; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
|
||||||
|
; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64
|
||||||
|
; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32
|
||||||
|
; OPT: attributes #3 = { nounwind }
|
||||||
|
|
||||||
|
; LLC: foo3:
|
||||||
|
; LLC: sample asm
|
||||||
|
; LLC: foo2:
|
||||||
|
; LLC: sample asm
|
||||||
|
; LLC: foo1:
|
||||||
|
; LLC: foo4@gotpcrel32@lo+4
|
||||||
|
; LLC: foo4@gotpcrel32@hi+4
|
||||||
|
; LLC: foo3@gotpcrel32@lo+4
|
||||||
|
; LLC: foo3@gotpcrel32@hi+4
|
||||||
|
; LLC: foo2@gotpcrel32@lo+4
|
||||||
|
; LLC: foo2@gotpcrel32@hi+4
|
||||||
|
; LLC: foo1@gotpcrel32@lo+4
|
||||||
|
; LLC: foo1@gotpcrel32@hi+4
|
||||||
|
; LLC: kernel1:
|
||||||
|
; LLC: foo1@gotpcrel32@lo+4
|
||||||
|
; LLC: foo1@gotpcrel32@hi+4
|
||||||
|
; LLC: kernel2:
|
||||||
|
; LLC: foo2@gotpcrel32@lo+4
|
||||||
|
; LLC: foo2@gotpcrel32@hi+4
|
||||||
|
|
||||||
|
declare void @foo4() #1
|
||||||
|
|
||||||
|
define void @foo3() #1 {
|
||||||
|
entry:
|
||||||
|
call void asm sideeffect "; sample asm", ""()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo2() #1 {
|
||||||
|
entry:
|
||||||
|
call void asm sideeffect "; sample asm", ""()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo1() #1 {
|
||||||
|
entry:
|
||||||
|
tail call void @foo4()
|
||||||
|
tail call void @foo3()
|
||||||
|
tail call void @foo2()
|
||||||
|
tail call void @foo2()
|
||||||
|
tail call void @foo1()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @kernel1() #0 {
|
||||||
|
entry:
|
||||||
|
tail call void @foo1()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @kernel2() #0 {
|
||||||
|
entry:
|
||||||
|
tail call void @foo2()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
|
||||||
|
attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
|
Loading…
Reference in New Issue