forked from OSchip/llvm-project
Pass Divergence Analysis data to Selection DAG to drive divergence
dependent instruction selection. Differential revision: https://reviews.llvm.org/D35267 llvm-svn: 326703
This commit is contained in:
parent
15186d4938
commit
2e5eeceeb7
|
@ -13,6 +13,8 @@
|
||||||
// better decisions.
|
// better decisions.
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
#ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
|
||||||
|
#define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
|
||||||
|
|
||||||
#include "llvm/ADT/DenseSet.h"
|
#include "llvm/ADT/DenseSet.h"
|
||||||
#include "llvm/IR/Function.h"
|
#include "llvm/IR/Function.h"
|
||||||
|
@ -46,3 +48,5 @@ private:
|
||||||
DenseSet<const Value *> DivergentValues;
|
DenseSet<const Value *> DivergentValues;
|
||||||
};
|
};
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
||||||
|
#endif //LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
|
|
@ -118,6 +118,17 @@ public:
|
||||||
/// cross-basic-block values.
|
/// cross-basic-block values.
|
||||||
DenseMap<const Value *, unsigned> ValueMap;
|
DenseMap<const Value *, unsigned> ValueMap;
|
||||||
|
|
||||||
|
/// VirtReg2Value map is needed by the Divergence Analysis driven
|
||||||
|
/// instruction selection. It is reverted ValueMap. It is computed
|
||||||
|
/// in lazy style - on demand. It is used to get the Value corresponding
|
||||||
|
/// to the live in virtual register and is called from the
|
||||||
|
/// TargetLowerinInfo::isSDNodeSourceOfDivergence.
|
||||||
|
DenseMap<unsigned, const Value*> VirtReg2Value;
|
||||||
|
|
||||||
|
/// This method is called from TargetLowerinInfo::isSDNodeSourceOfDivergence
|
||||||
|
/// to get the Value corresponding to the live-in virtual register.
|
||||||
|
const Value * getValueFromVirtualReg(unsigned Vreg);
|
||||||
|
|
||||||
/// Track virtual registers created for exception pointers.
|
/// Track virtual registers created for exception pointers.
|
||||||
DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
|
DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
|
||||||
|
|
||||||
|
|
|
@ -28,8 +28,10 @@
|
||||||
#include "llvm/ADT/iterator.h"
|
#include "llvm/ADT/iterator.h"
|
||||||
#include "llvm/ADT/iterator_range.h"
|
#include "llvm/ADT/iterator_range.h"
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
|
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||||
#include "llvm/CodeGen/DAGCombine.h"
|
#include "llvm/CodeGen/DAGCombine.h"
|
||||||
#include "llvm/CodeGen/ISDOpcodes.h"
|
#include "llvm/CodeGen/ISDOpcodes.h"
|
||||||
|
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineMemOperand.h"
|
#include "llvm/CodeGen/MachineMemOperand.h"
|
||||||
#include "llvm/CodeGen/MachineValueType.h"
|
#include "llvm/CodeGen/MachineValueType.h"
|
||||||
|
@ -217,6 +219,9 @@ class SelectionDAG {
|
||||||
LLVMContext *Context;
|
LLVMContext *Context;
|
||||||
CodeGenOpt::Level OptLevel;
|
CodeGenOpt::Level OptLevel;
|
||||||
|
|
||||||
|
DivergenceAnalysis * DA = nullptr;
|
||||||
|
FunctionLoweringInfo * FLI = nullptr;
|
||||||
|
|
||||||
/// The function-level optimization remark emitter. Used to emit remarks
|
/// The function-level optimization remark emitter. Used to emit remarks
|
||||||
/// whenever manipulating the DAG.
|
/// whenever manipulating the DAG.
|
||||||
OptimizationRemarkEmitter *ORE;
|
OptimizationRemarkEmitter *ORE;
|
||||||
|
@ -346,19 +351,7 @@ private:
|
||||||
.getRawSubclassData();
|
.getRawSubclassData();
|
||||||
}
|
}
|
||||||
|
|
||||||
void createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
|
void createOperands(SDNode *Node, ArrayRef<SDValue> Vals);
|
||||||
assert(!Node->OperandList && "Node already has operands");
|
|
||||||
SDUse *Ops = OperandRecycler.allocate(
|
|
||||||
ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
|
|
||||||
|
|
||||||
for (unsigned I = 0; I != Vals.size(); ++I) {
|
|
||||||
Ops[I].setUser(Node);
|
|
||||||
Ops[I].setInitial(Vals[I]);
|
|
||||||
}
|
|
||||||
Node->NumOperands = Vals.size();
|
|
||||||
Node->OperandList = Ops;
|
|
||||||
checkForCycles(Node);
|
|
||||||
}
|
|
||||||
|
|
||||||
void removeOperands(SDNode *Node) {
|
void removeOperands(SDNode *Node) {
|
||||||
if (!Node->OperandList)
|
if (!Node->OperandList)
|
||||||
|
@ -369,7 +362,7 @@ private:
|
||||||
Node->NumOperands = 0;
|
Node->NumOperands = 0;
|
||||||
Node->OperandList = nullptr;
|
Node->OperandList = nullptr;
|
||||||
}
|
}
|
||||||
|
void CreateTopologicalOrder(std::vector<SDNode*>& Order);
|
||||||
public:
|
public:
|
||||||
explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level);
|
explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level);
|
||||||
SelectionDAG(const SelectionDAG &) = delete;
|
SelectionDAG(const SelectionDAG &) = delete;
|
||||||
|
@ -378,7 +371,12 @@ public:
|
||||||
|
|
||||||
/// Prepare this SelectionDAG to process code in the given MachineFunction.
|
/// Prepare this SelectionDAG to process code in the given MachineFunction.
|
||||||
void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
|
void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
|
||||||
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo);
|
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
|
||||||
|
DivergenceAnalysis * DA);
|
||||||
|
|
||||||
|
void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
|
||||||
|
FLI = FuncInfo;
|
||||||
|
}
|
||||||
|
|
||||||
/// Clear state and free memory necessary to make this
|
/// Clear state and free memory necessary to make this
|
||||||
/// SelectionDAG ready to process a new block.
|
/// SelectionDAG ready to process a new block.
|
||||||
|
@ -463,6 +461,8 @@ public:
|
||||||
return Root;
|
return Root;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VerifyDAGDiverence();
|
||||||
|
|
||||||
/// This iterates over the nodes in the SelectionDAG, folding
|
/// This iterates over the nodes in the SelectionDAG, folding
|
||||||
/// certain types of nodes together, or eliminating superfluous nodes. The
|
/// certain types of nodes together, or eliminating superfluous nodes. The
|
||||||
/// Level argument controls whether Combine is allowed to produce nodes and
|
/// Level argument controls whether Combine is allowed to produce nodes and
|
||||||
|
@ -1128,6 +1128,9 @@ public:
|
||||||
SDValue Op3, SDValue Op4, SDValue Op5);
|
SDValue Op3, SDValue Op4, SDValue Op5);
|
||||||
SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
|
SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
|
||||||
|
|
||||||
|
// Propagates the change in divergence to users
|
||||||
|
void updateDivergence(SDNode * N);
|
||||||
|
|
||||||
/// These are used for target selectors to *mutate* the
|
/// These are used for target selectors to *mutate* the
|
||||||
/// specified node to have the specified return type, Target opcode, and
|
/// specified node to have the specified return type, Target opcode, and
|
||||||
/// operands. Note that target opcodes are stored as
|
/// operands. Note that target opcodes are stored as
|
||||||
|
|
|
@ -466,11 +466,13 @@ protected:
|
||||||
friend class SDNode;
|
friend class SDNode;
|
||||||
friend class MemIntrinsicSDNode;
|
friend class MemIntrinsicSDNode;
|
||||||
friend class MemSDNode;
|
friend class MemSDNode;
|
||||||
|
friend class SelectionDAG;
|
||||||
|
|
||||||
uint16_t HasDebugValue : 1;
|
uint16_t HasDebugValue : 1;
|
||||||
uint16_t IsMemIntrinsic : 1;
|
uint16_t IsMemIntrinsic : 1;
|
||||||
|
uint16_t IsDivergent : 1;
|
||||||
};
|
};
|
||||||
enum { NumSDNodeBits = 2 };
|
enum { NumSDNodeBits = 3 };
|
||||||
|
|
||||||
class ConstantSDNodeBitfields {
|
class ConstantSDNodeBitfields {
|
||||||
friend class ConstantSDNode;
|
friend class ConstantSDNode;
|
||||||
|
@ -662,6 +664,8 @@ public:
|
||||||
bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
|
bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
|
||||||
void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
|
void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
|
||||||
|
|
||||||
|
bool isDivergent() const { return SDNodeBits.IsDivergent; }
|
||||||
|
|
||||||
/// Return true if there are no uses of this node.
|
/// Return true if there are no uses of this node.
|
||||||
bool use_empty() const { return UseList == nullptr; }
|
bool use_empty() const { return UseList == nullptr; }
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||||
#include "llvm/CodeGen/DAGCombine.h"
|
#include "llvm/CodeGen/DAGCombine.h"
|
||||||
#include "llvm/CodeGen/ISDOpcodes.h"
|
#include "llvm/CodeGen/ISDOpcodes.h"
|
||||||
#include "llvm/CodeGen/MachineValueType.h"
|
#include "llvm/CodeGen/MachineValueType.h"
|
||||||
|
@ -2562,6 +2563,16 @@ public:
|
||||||
|
|
||||||
bool isPositionIndependent() const;
|
bool isPositionIndependent() const;
|
||||||
|
|
||||||
|
virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
|
||||||
|
FunctionLoweringInfo *FLI,
|
||||||
|
DivergenceAnalysis *DA) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true by value, base pointer and offset pointer and addressing mode
|
/// Returns true by value, base pointer and offset pointer and addressing mode
|
||||||
/// by reference if the node's address can be legally represented as
|
/// by reference if the node's address can be legally represented as
|
||||||
/// pre-indexed load / store address.
|
/// pre-indexed load / store address.
|
||||||
|
|
|
@ -547,3 +547,13 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const
|
||||||
}
|
}
|
||||||
return std::make_pair(It->second, false);
|
return std::make_pair(It->second, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Value *
|
||||||
|
FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
|
||||||
|
if (VirtReg2Value.empty()) {
|
||||||
|
for (auto &P : ValueMap) {
|
||||||
|
VirtReg2Value[P.second] = P.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return VirtReg2Value[Vreg];
|
||||||
|
}
|
||||||
|
|
|
@ -950,7 +950,8 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
|
||||||
|
|
||||||
void SelectionDAG::init(MachineFunction &NewMF,
|
void SelectionDAG::init(MachineFunction &NewMF,
|
||||||
OptimizationRemarkEmitter &NewORE,
|
OptimizationRemarkEmitter &NewORE,
|
||||||
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo) {
|
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
|
||||||
|
DivergenceAnalysis * Divergence) {
|
||||||
MF = &NewMF;
|
MF = &NewMF;
|
||||||
SDAGISelPass = PassPtr;
|
SDAGISelPass = PassPtr;
|
||||||
ORE = &NewORE;
|
ORE = &NewORE;
|
||||||
|
@ -958,6 +959,7 @@ void SelectionDAG::init(MachineFunction &NewMF,
|
||||||
TSI = getSubtarget().getSelectionDAGInfo();
|
TSI = getSubtarget().getSelectionDAGInfo();
|
||||||
LibInfo = LibraryInfo;
|
LibInfo = LibraryInfo;
|
||||||
Context = &MF->getFunction().getContext();
|
Context = &MF->getFunction().getContext();
|
||||||
|
DA = Divergence;
|
||||||
}
|
}
|
||||||
|
|
||||||
SelectionDAG::~SelectionDAG() {
|
SelectionDAG::~SelectionDAG() {
|
||||||
|
@ -1713,6 +1715,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
|
||||||
return SDValue(E, 0);
|
return SDValue(E, 0);
|
||||||
|
|
||||||
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
|
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
|
||||||
|
N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
|
||||||
CSEMap.InsertNode(N, IP);
|
CSEMap.InsertNode(N, IP);
|
||||||
InsertNode(N);
|
InsertNode(N);
|
||||||
return SDValue(N, 0);
|
return SDValue(N, 0);
|
||||||
|
@ -6699,6 +6702,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
|
||||||
if (N->OperandList[1] != Op2)
|
if (N->OperandList[1] != Op2)
|
||||||
N->OperandList[1].set(Op2);
|
N->OperandList[1].set(Op2);
|
||||||
|
|
||||||
|
updateDivergence(N);
|
||||||
// If this gets put into a CSE map, add it.
|
// If this gets put into a CSE map, add it.
|
||||||
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
|
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
|
||||||
return N;
|
return N;
|
||||||
|
@ -7340,8 +7344,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
|
||||||
SDUse &Use = UI.getUse();
|
SDUse &Use = UI.getUse();
|
||||||
++UI;
|
++UI;
|
||||||
Use.set(To);
|
Use.set(To);
|
||||||
|
if (To->isDivergent() != From->isDivergent())
|
||||||
|
updateDivergence(User);
|
||||||
} while (UI != UE && *UI == User);
|
} while (UI != UE && *UI == User);
|
||||||
|
|
||||||
// Now that we have modified User, add it back to the CSE maps. If it
|
// Now that we have modified User, add it back to the CSE maps. If it
|
||||||
// already exists there, recursively merge the results together.
|
// already exists there, recursively merge the results together.
|
||||||
AddModifiedNodeToCSEMaps(User);
|
AddModifiedNodeToCSEMaps(User);
|
||||||
|
@ -7395,6 +7400,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
|
||||||
SDUse &Use = UI.getUse();
|
SDUse &Use = UI.getUse();
|
||||||
++UI;
|
++UI;
|
||||||
Use.setNode(To);
|
Use.setNode(To);
|
||||||
|
if (To->isDivergent() != From->isDivergent())
|
||||||
|
updateDivergence(User);
|
||||||
} while (UI != UE && *UI == User);
|
} while (UI != UE && *UI == User);
|
||||||
|
|
||||||
// Now that we have modified User, add it back to the CSE maps. If it
|
// Now that we have modified User, add it back to the CSE maps. If it
|
||||||
|
@ -7439,8 +7446,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
|
||||||
const SDValue &ToOp = To[Use.getResNo()];
|
const SDValue &ToOp = To[Use.getResNo()];
|
||||||
++UI;
|
++UI;
|
||||||
Use.set(ToOp);
|
Use.set(ToOp);
|
||||||
|
if (To->getNode()->isDivergent() != From->isDivergent())
|
||||||
|
updateDivergence(User);
|
||||||
} while (UI != UE && *UI == User);
|
} while (UI != UE && *UI == User);
|
||||||
|
|
||||||
// Now that we have modified User, add it back to the CSE maps. If it
|
// Now that we have modified User, add it back to the CSE maps. If it
|
||||||
// already exists there, recursively merge the results together.
|
// already exists there, recursively merge the results together.
|
||||||
AddModifiedNodeToCSEMaps(User);
|
AddModifiedNodeToCSEMaps(User);
|
||||||
|
@ -7498,8 +7506,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
|
||||||
|
|
||||||
++UI;
|
++UI;
|
||||||
Use.set(To);
|
Use.set(To);
|
||||||
|
if (To->isDivergent() != From->isDivergent())
|
||||||
|
updateDivergence(User);
|
||||||
} while (UI != UE && *UI == User);
|
} while (UI != UE && *UI == User);
|
||||||
|
|
||||||
// We are iterating over all uses of the From node, so if a use
|
// We are iterating over all uses of the From node, so if a use
|
||||||
// doesn't use the specific value, no changes are made.
|
// doesn't use the specific value, no changes are made.
|
||||||
if (!UserRemovedFromCSEMaps)
|
if (!UserRemovedFromCSEMaps)
|
||||||
|
@ -7532,6 +7541,70 @@ namespace {
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
|
void SelectionDAG::updateDivergence(SDNode * N)
|
||||||
|
{
|
||||||
|
if (TLI->isSDNodeAlwaysUniform(N))
|
||||||
|
return;
|
||||||
|
bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
|
||||||
|
for (auto &Op : N->ops()) {
|
||||||
|
if (Op.Val.getValueType() != MVT::Other)
|
||||||
|
IsDivergent |= Op.getNode()->isDivergent();
|
||||||
|
}
|
||||||
|
if (N->SDNodeBits.IsDivergent != IsDivergent) {
|
||||||
|
N->SDNodeBits.IsDivergent = IsDivergent;
|
||||||
|
for (auto U : N->uses()) {
|
||||||
|
updateDivergence(U);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
|
||||||
|
DenseMap<SDNode *, unsigned> Degree;
|
||||||
|
Order.reserve(AllNodes.size());
|
||||||
|
for (auto & N : allnodes()) {
|
||||||
|
unsigned NOps = N.getNumOperands();
|
||||||
|
Degree[&N] = NOps;
|
||||||
|
if (0 == NOps)
|
||||||
|
Order.push_back(&N);
|
||||||
|
}
|
||||||
|
for (std::vector<SDNode *>::iterator I = Order.begin();
|
||||||
|
I!=Order.end();++I) {
|
||||||
|
SDNode * N = *I;
|
||||||
|
for (auto U : N->uses()) {
|
||||||
|
unsigned &UnsortedOps = Degree[U];
|
||||||
|
if (0 == --UnsortedOps)
|
||||||
|
Order.push_back(U);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SelectionDAG::VerifyDAGDiverence()
|
||||||
|
{
|
||||||
|
std::vector<SDNode*> TopoOrder;
|
||||||
|
CreateTopologicalOrder(TopoOrder);
|
||||||
|
const TargetLowering &TLI = getTargetLoweringInfo();
|
||||||
|
DenseMap<const SDNode *, bool> DivergenceMap;
|
||||||
|
for (auto &N : allnodes()) {
|
||||||
|
DivergenceMap[&N] = false;
|
||||||
|
}
|
||||||
|
for (auto N : TopoOrder) {
|
||||||
|
bool IsDivergent = DivergenceMap[N];
|
||||||
|
bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
|
||||||
|
for (auto &Op : N->ops()) {
|
||||||
|
if (Op.Val.getValueType() != MVT::Other)
|
||||||
|
IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
|
||||||
|
}
|
||||||
|
if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
|
||||||
|
DivergenceMap[N] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto &N : allnodes()) {
|
||||||
|
assert(DivergenceMap[&N] == N.isDivergent() && "Divergence bit inconsistency detected\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
|
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
|
||||||
/// uses of other values produced by From.getNode() alone. The same value
|
/// uses of other values produced by From.getNode() alone. The same value
|
||||||
/// may appear in both the From and To list. The Deleted vector is
|
/// may appear in both the From and To list. The Deleted vector is
|
||||||
|
@ -8337,6 +8410,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
|
||||||
|
assert(!Node->OperandList && "Node already has operands");
|
||||||
|
SDUse *Ops = OperandRecycler.allocate(
|
||||||
|
ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
|
||||||
|
|
||||||
|
bool IsDivergent = false;
|
||||||
|
for (unsigned I = 0; I != Vals.size(); ++I) {
|
||||||
|
Ops[I].setUser(Node);
|
||||||
|
Ops[I].setInitial(Vals[I]);
|
||||||
|
if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
|
||||||
|
IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
|
||||||
|
}
|
||||||
|
Node->NumOperands = Vals.size();
|
||||||
|
Node->OperandList = Ops;
|
||||||
|
IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
|
||||||
|
if (!TLI->isSDNodeAlwaysUniform(Node))
|
||||||
|
Node->SDNodeBits.IsDivergent = IsDivergent;
|
||||||
|
checkForCycles(Node);
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
static void checkForCyclesHelper(const SDNode *N,
|
static void checkForCyclesHelper(const SDNode *N,
|
||||||
SmallPtrSetImpl<const SDNode*> &Visited,
|
SmallPtrSetImpl<const SDNode*> &Visited,
|
||||||
|
|
|
@ -629,6 +629,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
|
||||||
|
|
||||||
if (getNodeId() != -1)
|
if (getNodeId() != -1)
|
||||||
OS << " [ID=" << getNodeId() << ']';
|
OS << " [ID=" << getNodeId() << ']';
|
||||||
|
if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
|
||||||
|
OS << "# D:" << isDivergent();
|
||||||
|
|
||||||
if (!G)
|
if (!G)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include "llvm/Analysis/CFG.h"
|
#include "llvm/Analysis/CFG.h"
|
||||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||||
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
#include "llvm/CodeGen/FastISel.h"
|
#include "llvm/CodeGen/FastISel.h"
|
||||||
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
||||||
#include "llvm/CodeGen/GCMetadata.h"
|
#include "llvm/CodeGen/GCMetadata.h"
|
||||||
|
@ -329,6 +330,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
AU.addPreserved<StackProtector>();
|
AU.addPreserved<StackProtector>();
|
||||||
AU.addPreserved<GCModuleInfo>();
|
AU.addPreserved<GCModuleInfo>();
|
||||||
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
||||||
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||||
if (UseMBPI && OptLevel != CodeGenOpt::None)
|
if (UseMBPI && OptLevel != CodeGenOpt::None)
|
||||||
AU.addRequired<BranchProbabilityInfoWrapperPass>();
|
AU.addRequired<BranchProbabilityInfoWrapperPass>();
|
||||||
MachineFunctionPass::getAnalysisUsage(AU);
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||||||
|
@ -414,7 +416,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
|
||||||
|
|
||||||
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
|
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
|
||||||
|
|
||||||
CurDAG->init(*MF, *ORE, this, LibInfo);
|
CurDAG->init(*MF, *ORE, this, LibInfo,
|
||||||
|
getAnalysisIfAvailable<DivergenceAnalysis>());
|
||||||
FuncInfo->set(Fn, *MF, CurDAG);
|
FuncInfo->set(Fn, *MF, CurDAG);
|
||||||
|
|
||||||
// Now get the optional analyzes if we want to.
|
// Now get the optional analyzes if we want to.
|
||||||
|
@ -711,6 +714,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
|
||||||
int BlockNumber = -1;
|
int BlockNumber = -1;
|
||||||
(void)BlockNumber;
|
(void)BlockNumber;
|
||||||
bool MatchFilterBB = false; (void)MatchFilterBB;
|
bool MatchFilterBB = false; (void)MatchFilterBB;
|
||||||
|
TargetTransformInfo &TTI =
|
||||||
|
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
|
||||||
|
|
||||||
// Pre-type legalization allow creation of any node types.
|
// Pre-type legalization allow creation of any node types.
|
||||||
CurDAG->NewNodesMustHaveLegalTypes = false;
|
CurDAG->NewNodesMustHaveLegalTypes = false;
|
||||||
|
@ -744,6 +749,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
|
||||||
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
|
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (TTI.hasBranchDivergence())
|
||||||
|
CurDAG->VerifyDAGDiverence();
|
||||||
|
|
||||||
DEBUG(dbgs() << "Optimized lowered selection DAG: "
|
DEBUG(dbgs() << "Optimized lowered selection DAG: "
|
||||||
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
||||||
<< "'\n";
|
<< "'\n";
|
||||||
|
@ -761,6 +769,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
|
||||||
Changed = CurDAG->LegalizeTypes();
|
Changed = CurDAG->LegalizeTypes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (TTI.hasBranchDivergence())
|
||||||
|
CurDAG->VerifyDAGDiverence();
|
||||||
|
|
||||||
DEBUG(dbgs() << "Type-legalized selection DAG: "
|
DEBUG(dbgs() << "Type-legalized selection DAG: "
|
||||||
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
||||||
<< "'\n";
|
<< "'\n";
|
||||||
|
@ -780,6 +791,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
|
||||||
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
|
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (TTI.hasBranchDivergence())
|
||||||
|
CurDAG->VerifyDAGDiverence();
|
||||||
|
|
||||||
DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
|
DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
|
||||||
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
||||||
<< "'\n";
|
<< "'\n";
|
||||||
|
@ -823,6 +837,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
|
||||||
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
||||||
<< "'\n";
|
<< "'\n";
|
||||||
CurDAG->dump());
|
CurDAG->dump());
|
||||||
|
|
||||||
|
if (TTI.hasBranchDivergence())
|
||||||
|
CurDAG->VerifyDAGDiverence();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ViewLegalizeDAGs && MatchFilterBB)
|
if (ViewLegalizeDAGs && MatchFilterBB)
|
||||||
|
@ -834,6 +851,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
|
||||||
CurDAG->Legalize();
|
CurDAG->Legalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (TTI.hasBranchDivergence())
|
||||||
|
CurDAG->VerifyDAGDiverence();
|
||||||
|
|
||||||
DEBUG(dbgs() << "Legalized selection DAG: "
|
DEBUG(dbgs() << "Legalized selection DAG: "
|
||||||
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
||||||
<< "'\n";
|
<< "'\n";
|
||||||
|
@ -849,6 +869,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
|
||||||
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
|
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (TTI.hasBranchDivergence())
|
||||||
|
CurDAG->VerifyDAGDiverence();
|
||||||
|
|
||||||
DEBUG(dbgs() << "Optimized legalized selection DAG: "
|
DEBUG(dbgs() << "Optimized legalized selection DAG: "
|
||||||
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
|
||||||
<< "'\n";
|
<< "'\n";
|
||||||
|
@ -1401,6 +1424,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
||||||
FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
|
FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
|
||||||
FuncInfo->InsertPt = FuncInfo->MBB->begin();
|
FuncInfo->InsertPt = FuncInfo->MBB->begin();
|
||||||
|
|
||||||
|
CurDAG->setFunctionLoweringInfo(FuncInfo);
|
||||||
|
|
||||||
if (!FastIS) {
|
if (!FastIS) {
|
||||||
LowerArguments(Fn);
|
LowerArguments(Fn);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include "llvm/ADT/APInt.h"
|
#include "llvm/ADT/APInt.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||||
#include "llvm/Analysis/ValueTracking.h"
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
||||||
#include "llvm/CodeGen/ISDOpcodes.h"
|
#include "llvm/CodeGen/ISDOpcodes.h"
|
||||||
|
@ -83,6 +84,7 @@ public:
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
AU.addRequired<AMDGPUArgumentUsageInfo>();
|
AU.addRequired<AMDGPUArgumentUsageInfo>();
|
||||||
|
AU.addRequired<DivergenceAnalysis>();
|
||||||
SelectionDAGISel::getAnalysisUsage(AU);
|
SelectionDAGISel::getAnalysisUsage(AU);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "AMDGPURegisterInfo.h"
|
#include "AMDGPURegisterInfo.h"
|
||||||
#include "AMDGPUSubtarget.h"
|
#include "AMDGPUSubtarget.h"
|
||||||
#include "AMDGPUTargetMachine.h"
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
#include "Utils/AMDGPUBaseInfo.h"
|
||||||
#include "R600MachineFunctionInfo.h"
|
#include "R600MachineFunctionInfo.h"
|
||||||
#include "SIInstrInfo.h"
|
#include "SIInstrInfo.h"
|
||||||
#include "SIMachineFunctionInfo.h"
|
#include "SIMachineFunctionInfo.h"
|
||||||
|
@ -748,6 +749,101 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
case ISD::EntryToken:
|
||||||
|
case ISD::TokenFactor:
|
||||||
|
return true;
|
||||||
|
case ISD::INTRINSIC_WO_CHAIN:
|
||||||
|
{
|
||||||
|
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
|
||||||
|
switch (IntrID) {
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
case Intrinsic::amdgcn_readfirstlane:
|
||||||
|
case Intrinsic::amdgcn_readlane:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case ISD::LOAD:
|
||||||
|
{
|
||||||
|
const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
|
||||||
|
if (L->getMemOperand()->getAddrSpace()
|
||||||
|
== Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUTargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
|
||||||
|
FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const
|
||||||
|
{
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
case ISD::Register:
|
||||||
|
case ISD::CopyFromReg:
|
||||||
|
{
|
||||||
|
const RegisterSDNode *R = nullptr;
|
||||||
|
if (N->getOpcode() == ISD::Register) {
|
||||||
|
R = dyn_cast<RegisterSDNode>(N);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
R = dyn_cast<RegisterSDNode>(N->getOperand(1));
|
||||||
|
}
|
||||||
|
if (R)
|
||||||
|
{
|
||||||
|
const MachineFunction * MF = FLI->MF;
|
||||||
|
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||||
|
const MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
|
const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
|
||||||
|
unsigned Reg = R->getReg();
|
||||||
|
if (TRI.isPhysicalRegister(Reg))
|
||||||
|
return TRI.isVGPR(MRI, Reg);
|
||||||
|
|
||||||
|
if (MRI.isLiveIn(Reg)) {
|
||||||
|
// workitem.id.x workitem.id.y workitem.id.z
|
||||||
|
if ((MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_X) ||
|
||||||
|
(MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_Y) ||
|
||||||
|
(MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_Z)||
|
||||||
|
(MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR0) ||
|
||||||
|
(MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR1) ||
|
||||||
|
(MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR2))
|
||||||
|
return true;
|
||||||
|
// Formal arguments of non-entry functions
|
||||||
|
// are conservatively considered divergent
|
||||||
|
else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return !DA || DA->isDivergent(FLI->getValueFromVirtualReg(Reg));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case ISD::LOAD: {
|
||||||
|
const LoadSDNode *L = dyn_cast<LoadSDNode>(N);
|
||||||
|
if (L->getMemOperand()->getAddrSpace() ==
|
||||||
|
Subtarget->getAMDGPUAS().PRIVATE_ADDRESS)
|
||||||
|
return true;
|
||||||
|
} break;
|
||||||
|
case ISD::CALLSEQ_END:
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
case ISD::INTRINSIC_WO_CHAIN:
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
return AMDGPU::isIntrinsicSourceOfDivergence(
|
||||||
|
cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
|
||||||
|
case ISD::INTRINSIC_W_CHAIN:
|
||||||
|
return AMDGPU::isIntrinsicSourceOfDivergence(
|
||||||
|
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// Target Properties
|
// Target Properties
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
|
@ -168,6 +168,9 @@ public:
|
||||||
bool isCheapToSpeculateCttz() const override;
|
bool isCheapToSpeculateCttz() const override;
|
||||||
bool isCheapToSpeculateCtlz() const override;
|
bool isCheapToSpeculateCtlz() const override;
|
||||||
|
|
||||||
|
bool isSDNodeSourceOfDivergence(const SDNode * N,
|
||||||
|
FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const;
|
||||||
|
bool isSDNodeAlwaysUniform(const SDNode * N) const;
|
||||||
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
|
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
|
||||||
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
|
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
#include "AMDGPUTargetTransformInfo.h"
|
#include "AMDGPUTargetTransformInfo.h"
|
||||||
#include "AMDGPUSubtarget.h"
|
#include "AMDGPUSubtarget.h"
|
||||||
|
#include "Utils/AMDGPUBaseInfo.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
#include "llvm/Analysis/LoopInfo.h"
|
#include "llvm/Analysis/LoopInfo.h"
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
|
@ -464,55 +465,7 @@ int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isIntrinsicSourceOfDivergence(const IntrinsicInst *I) {
|
|
||||||
switch (I->getIntrinsicID()) {
|
|
||||||
case Intrinsic::amdgcn_workitem_id_x:
|
|
||||||
case Intrinsic::amdgcn_workitem_id_y:
|
|
||||||
case Intrinsic::amdgcn_workitem_id_z:
|
|
||||||
case Intrinsic::amdgcn_interp_mov:
|
|
||||||
case Intrinsic::amdgcn_interp_p1:
|
|
||||||
case Intrinsic::amdgcn_interp_p2:
|
|
||||||
case Intrinsic::amdgcn_mbcnt_hi:
|
|
||||||
case Intrinsic::amdgcn_mbcnt_lo:
|
|
||||||
case Intrinsic::r600_read_tidig_x:
|
|
||||||
case Intrinsic::r600_read_tidig_y:
|
|
||||||
case Intrinsic::r600_read_tidig_z:
|
|
||||||
case Intrinsic::amdgcn_atomic_inc:
|
|
||||||
case Intrinsic::amdgcn_atomic_dec:
|
|
||||||
case Intrinsic::amdgcn_ds_fadd:
|
|
||||||
case Intrinsic::amdgcn_ds_fmin:
|
|
||||||
case Intrinsic::amdgcn_ds_fmax:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_swap:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_add:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_sub:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_smin:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_umin:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_smax:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_umax:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_and:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_or:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_xor:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_inc:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_dec:
|
|
||||||
case Intrinsic::amdgcn_image_atomic_cmpswap:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_swap:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_add:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_sub:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_smin:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_umin:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_smax:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_umax:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_and:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_or:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_xor:
|
|
||||||
case Intrinsic::amdgcn_buffer_atomic_cmpswap:
|
|
||||||
case Intrinsic::amdgcn_ps_live:
|
|
||||||
case Intrinsic::amdgcn_ds_swizzle:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool isArgPassedInSGPR(const Argument *A) {
|
static bool isArgPassedInSGPR(const Argument *A) {
|
||||||
const Function *F = A->getParent();
|
const Function *F = A->getParent();
|
||||||
|
@ -563,7 +516,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
|
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
|
||||||
return isIntrinsicSourceOfDivergence(Intrinsic);
|
return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
|
||||||
|
|
||||||
// Assume all function calls are a source of divergence.
|
// Assume all function calls are a source of divergence.
|
||||||
if (isa<CallInst>(V) || isa<InvokeInst>(V))
|
if (isa<CallInst>(V) || isa<InvokeInst>(V))
|
||||||
|
|
|
@ -5372,7 +5372,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||||
unsigned NumElements = MemVT.getVectorNumElements();
|
unsigned NumElements = MemVT.getVectorNumElements();
|
||||||
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
|
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
|
||||||
if (isMemOpUniform(Load))
|
if (!Op->isDivergent())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
// Non-uniform loads will be selected to MUBUF instructions, so they
|
// Non-uniform loads will be selected to MUBUF instructions, so they
|
||||||
// have the same legalization requirements as global and private
|
// have the same legalization requirements as global and private
|
||||||
|
@ -5382,7 +5382,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||||
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
|
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
|
||||||
AS == AMDGPUASI.GLOBAL_ADDRESS) {
|
AS == AMDGPUASI.GLOBAL_ADDRESS) {
|
||||||
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
|
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
|
||||||
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
|
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
// Non-uniform loads will be selected to MUBUF instructions, so they
|
// Non-uniform loads will be selected to MUBUF instructions, so they
|
||||||
|
|
|
@ -223,12 +223,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>
|
||||||
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
|
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
|
||||||
auto Ld = cast<LoadSDNode>(N);
|
auto Ld = cast<LoadSDNode>(N);
|
||||||
return Ld->getAlignment() >= 4 &&
|
return Ld->getAlignment() >= 4 &&
|
||||||
(((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) ||
|
||||||
Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
|
|
||||||
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
|
|
||||||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
|
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
|
||||||
!Ld->isVolatile() &&
|
!Ld->isVolatile() && !N->isDivergent() &&
|
||||||
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
|
|
||||||
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
|
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "AMDGPUBaseInfo.h"
|
#include "AMDGPUBaseInfo.h"
|
||||||
|
#include "AMDGPUTargetTransformInfo.h"
|
||||||
#include "AMDGPU.h"
|
#include "AMDGPU.h"
|
||||||
#include "SIDefines.h"
|
#include "SIDefines.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
@ -938,5 +939,55 @@ AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
|
||||||
AMDGPUAS getAMDGPUAS(const Module &M) {
|
AMDGPUAS getAMDGPUAS(const Module &M) {
|
||||||
return getAMDGPUAS(Triple(M.getTargetTriple()));
|
return getAMDGPUAS(Triple(M.getTargetTriple()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
|
||||||
|
switch (IntrID) {
|
||||||
|
case Intrinsic::amdgcn_workitem_id_x:
|
||||||
|
case Intrinsic::amdgcn_workitem_id_y:
|
||||||
|
case Intrinsic::amdgcn_workitem_id_z:
|
||||||
|
case Intrinsic::amdgcn_interp_mov:
|
||||||
|
case Intrinsic::amdgcn_interp_p1:
|
||||||
|
case Intrinsic::amdgcn_interp_p2:
|
||||||
|
case Intrinsic::amdgcn_mbcnt_hi:
|
||||||
|
case Intrinsic::amdgcn_mbcnt_lo:
|
||||||
|
case Intrinsic::r600_read_tidig_x:
|
||||||
|
case Intrinsic::r600_read_tidig_y:
|
||||||
|
case Intrinsic::r600_read_tidig_z:
|
||||||
|
case Intrinsic::amdgcn_atomic_inc:
|
||||||
|
case Intrinsic::amdgcn_atomic_dec:
|
||||||
|
case Intrinsic::amdgcn_ds_fadd:
|
||||||
|
case Intrinsic::amdgcn_ds_fmin:
|
||||||
|
case Intrinsic::amdgcn_ds_fmax:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_swap:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_add:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_sub:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_smin:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_umin:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_smax:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_umax:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_and:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_or:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_xor:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_inc:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_dec:
|
||||||
|
case Intrinsic::amdgcn_image_atomic_cmpswap:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_swap:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_add:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_sub:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_smin:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_umin:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_smax:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_umax:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_and:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_or:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_xor:
|
||||||
|
case Intrinsic::amdgcn_buffer_atomic_cmpswap:
|
||||||
|
case Intrinsic::amdgcn_ps_live:
|
||||||
|
case Intrinsic::amdgcn_ds_swizzle:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace AMDGPU
|
} // namespace AMDGPU
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
|
|
@ -382,6 +382,9 @@ int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
|
||||||
/// not the encoded offset.
|
/// not the encoded offset.
|
||||||
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
|
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
|
||||||
|
|
||||||
|
/// \returns true if the intrinsic is divergent
|
||||||
|
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
|
||||||
|
|
||||||
} // end namespace AMDGPU
|
} // end namespace AMDGPU
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}use_dispatch_ptr:
|
; GCN-LABEL: {{^}}use_dispatch_ptr:
|
||||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
|
||||||
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
|
||||||
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
define void @use_dispatch_ptr() #1 {
|
define void @use_dispatch_ptr() #1 {
|
||||||
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||||
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
||||||
|
@ -19,7 +21,9 @@ define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}use_queue_ptr:
|
; GCN-LABEL: {{^}}use_queue_ptr:
|
||||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
|
||||||
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
|
||||||
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
define void @use_queue_ptr() #1 {
|
define void @use_queue_ptr() #1 {
|
||||||
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||||
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
||||||
|
@ -37,11 +41,12 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
|
; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
|
||||||
; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[6:7], 0x10
|
; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1]
|
||||||
; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
|
; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
|
||||||
|
; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
|
||||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
|
; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
|
||||||
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
|
; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
|
||||||
|
; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
define void @use_queue_ptr_addrspacecast() #1 {
|
define void @use_queue_ptr_addrspacecast() #1 {
|
||||||
%asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
|
%asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
|
||||||
store volatile i32 0, i32* %asc
|
store volatile i32 0, i32* %asc
|
||||||
|
@ -60,7 +65,9 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
|
; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
|
||||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
|
||||||
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
|
||||||
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
define void @use_kernarg_segment_ptr() #1 {
|
define void @use_kernarg_segment_ptr() #1 {
|
||||||
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
||||||
%header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
|
%header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
|
||||||
|
@ -424,9 +431,15 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}use_every_sgpr_input:
|
; GCN-LABEL: {{^}}use_every_sgpr_input:
|
||||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
|
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
|
||||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
|
||||||
; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
|
||||||
; GCN: s_load_dword s{{[0-9]+}}, s[10:11], 0x0
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8
|
||||||
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9
|
||||||
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10
|
||||||
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11
|
||||||
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
; GCN: ; use s[12:13]
|
; GCN: ; use s[12:13]
|
||||||
; GCN: ; use s14
|
; GCN: ; use s14
|
||||||
; GCN: ; use s15
|
; GCN: ; use s15
|
||||||
|
@ -557,12 +570,23 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
|
||||||
; GCN-DAG: s_mov_b32 s6, s14
|
; GCN-DAG: s_mov_b32 s6, s14
|
||||||
; GCN-DAG: s_mov_b32 s7, s15
|
; GCN-DAG: s_mov_b32 s7, s15
|
||||||
; GCN-DAG: s_mov_b32 s8, s16
|
; GCN-DAG: s_mov_b32 s8, s16
|
||||||
|
|
||||||
|
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
|
||||||
|
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
|
||||||
|
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
|
||||||
|
|
||||||
; GCN: s_swappc_b64
|
; GCN: s_swappc_b64
|
||||||
|
|
||||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
|
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
|
||||||
; GCN: s_load_dword s{{[0-9]+}},
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_X]]
|
||||||
; GCN: s_load_dword s{{[0-9]+}},
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_X]]
|
||||||
; GCN: s_load_dword s{{[0-9]+}},
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Y]]
|
||||||
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Y]]
|
||||||
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
|
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Z]]
|
||||||
|
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Z]]
|
||||||
|
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||||
; GCN: ; use
|
; GCN: ; use
|
||||||
; GCN: ; use [[SAVE_X]]
|
; GCN: ; use [[SAVE_X]]
|
||||||
; GCN: ; use [[SAVE_Y]]
|
; GCN: ; use [[SAVE_Y]]
|
||||||
|
|
|
@ -34,7 +34,13 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}func_implicitarg_ptr:
|
; GCN-LABEL: {{^}}func_implicitarg_ptr:
|
||||||
; GCN: s_waitcnt
|
; GCN: s_waitcnt
|
||||||
; GCN-NEXT: s_load_dword s{{[0-9]+}}, s[6:7], 0x0{{$}}
|
; MESA: s_mov_b64 s[8:9], s[6:7]
|
||||||
|
; MESA: s_mov_b32 s11, 0xf000
|
||||||
|
; MESA: s_mov_b32 s10, -1
|
||||||
|
; MESA: buffer_load_dword v0, off, s[8:11], 0
|
||||||
|
; HSA: v_mov_b32_e32 v0, s6
|
||||||
|
; HSA: v_mov_b32_e32 v1, s7
|
||||||
|
; HSA: flat_load_dword v0, v[0:1]
|
||||||
; GCN-NEXT: s_waitcnt
|
; GCN-NEXT: s_waitcnt
|
||||||
; GCN-NEXT: s_setpc_b64
|
; GCN-NEXT: s_setpc_b64
|
||||||
define void @func_implicitarg_ptr() #1 {
|
define void @func_implicitarg_ptr() #1 {
|
||||||
|
@ -83,8 +89,21 @@ define void @func_call_implicitarg_ptr_func() #1 {
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
|
; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
|
||||||
; GCN: s_waitcnt
|
; GCN: s_waitcnt
|
||||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0{{$}}
|
; MESA: s_mov_b64 s[12:13], s[6:7]
|
||||||
; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0{{$}}
|
; MESA: s_mov_b32 s15, 0xf000
|
||||||
|
; MESA: s_mov_b32 s14, -1
|
||||||
|
; MESA: buffer_load_dword v0, off, s[12:15], 0
|
||||||
|
; HSA: v_mov_b32_e32 v0, s6
|
||||||
|
; HSA: v_mov_b32_e32 v1, s7
|
||||||
|
; HSA: flat_load_dword v0, v[0:1]
|
||||||
|
; MESA: s_mov_b32 s10, s14
|
||||||
|
; MESA: s_mov_b32 s11, s15
|
||||||
|
; MESA: buffer_load_dword v0, off, s[8:11], 0
|
||||||
|
; HSA: v_mov_b32_e32 v0, s8
|
||||||
|
; HSA: v_mov_b32_e32 v1, s9
|
||||||
|
; HSA: flat_load_dword v0, v[0:1]
|
||||||
|
|
||||||
|
; GCN: s_waitcnt vmcnt(0)
|
||||||
define void @func_kernarg_implicitarg_ptr() #1 {
|
define void @func_kernarg_implicitarg_ptr() #1 {
|
||||||
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||||
|
|
Loading…
Reference in New Issue