forked from OSchip/llvm-project
AMDGPU: Split AMDGPUTTI into GCNTTI and R600TTI
Reviewers: arsenm, nhaehnle Reviewed By: arsenm Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D47359 llvm-svn: 333605
This commit is contained in:
parent
178fdb1a3b
commit
c7624317d7
|
@ -439,6 +439,11 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl(
|
|||
return I.get();
|
||||
}
|
||||
|
||||
TargetTransformInfo
|
||||
R600TargetMachine::getTargetTransformInfo(const Function &F) {
|
||||
return TargetTransformInfo(R600TTIImpl(this, F));
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GCN Target Machine (SI+)
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -472,6 +477,11 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
|
|||
return I.get();
|
||||
}
|
||||
|
||||
TargetTransformInfo
|
||||
GCNTargetMachine::getTargetTransformInfo(const Function &F) {
|
||||
return TargetTransformInfo(GCNTTIImpl(this, F));
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU Pass Setup
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -561,11 +571,6 @@ public:
|
|||
|
||||
} // end anonymous namespace
|
||||
|
||||
TargetTransformInfo
|
||||
AMDGPUTargetMachine::getTargetTransformInfo(const Function &F) {
|
||||
return TargetTransformInfo(AMDGPUTTIImpl(this, F));
|
||||
}
|
||||
|
||||
void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
|
||||
if (getOptLevel() == CodeGenOpt::Aggressive)
|
||||
addPass(createGVNPass());
|
||||
|
|
|
@ -55,7 +55,6 @@ public:
|
|||
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
|
||||
return &IntrinsicInfo;
|
||||
}
|
||||
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
|
||||
|
||||
TargetLoweringObjectFile *getObjFileLowering() const override {
|
||||
return TLOF.get();
|
||||
|
@ -91,6 +90,8 @@ public:
|
|||
|
||||
const R600Subtarget *getSubtargetImpl(const Function &) const override;
|
||||
|
||||
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
|
||||
|
||||
bool isMachineVerifierClean() const override {
|
||||
return false;
|
||||
}
|
||||
|
@ -114,6 +115,8 @@ public:
|
|||
|
||||
const SISubtarget *getSubtargetImpl(const Function &) const override;
|
||||
|
||||
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
|
||||
|
||||
bool useIPRA() const override {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -211,32 +211,27 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
}
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
|
||||
unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
|
||||
// The concept of vector registers doesn't really exist. Some packed vector
|
||||
// operations operate on the normal 32-bit registers.
|
||||
|
||||
// Number of VGPRs on SI.
|
||||
if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
||||
return 256;
|
||||
|
||||
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
|
||||
return 256;
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) const {
|
||||
unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
|
||||
// This is really the number of registers to fill when vectorizing /
|
||||
// interleaving loops, so we lie to avoid trying to use all registers.
|
||||
return getHardwareNumberOfRegisters(Vec) >> 3;
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const {
|
||||
unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
|
||||
return 32;
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const {
|
||||
unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
|
||||
return 32;
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
|
||||
unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
|
||||
unsigned ChainSizeInBytes,
|
||||
VectorType *VecTy) const {
|
||||
unsigned VecRegBitWidth = VF * LoadSize;
|
||||
|
@ -247,7 +242,7 @@ unsigned AMDGPUTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
|
|||
return VF;
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
|
||||
unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
|
||||
unsigned ChainSizeInBytes,
|
||||
VectorType *VecTy) const {
|
||||
unsigned VecRegBitWidth = VF * StoreSize;
|
||||
|
@ -257,13 +252,11 @@ unsigned AMDGPUTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
|
|||
return VF;
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
|
||||
unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
|
||||
AMDGPUAS AS = ST->getAMDGPUAS();
|
||||
if (AddrSpace == AS.GLOBAL_ADDRESS ||
|
||||
AddrSpace == AS.CONSTANT_ADDRESS ||
|
||||
AddrSpace == AS.CONSTANT_ADDRESS_32BIT) {
|
||||
if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||
return 128;
|
||||
return 512;
|
||||
}
|
||||
|
||||
|
@ -275,16 +268,10 @@ unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
|
|||
if (AddrSpace == AS.PRIVATE_ADDRESS)
|
||||
return 8 * ST->getMaxPrivateElementSize();
|
||||
|
||||
if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
|
||||
(AddrSpace == AS.PARAM_D_ADDRESS ||
|
||||
AddrSpace == AS.PARAM_I_ADDRESS ||
|
||||
(AddrSpace >= AS.CONSTANT_BUFFER_0 &&
|
||||
AddrSpace <= AS.CONSTANT_BUFFER_15)))
|
||||
return 128;
|
||||
llvm_unreachable("unhandled address space");
|
||||
}
|
||||
|
||||
bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
|
||||
bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
|
||||
unsigned Alignment,
|
||||
unsigned AddrSpace) const {
|
||||
// We allow vectorization of flat stores, even though we may need to decompose
|
||||
|
@ -297,19 +284,19 @@ bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
|
||||
bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
|
||||
unsigned Alignment,
|
||||
unsigned AddrSpace) const {
|
||||
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
|
||||
}
|
||||
|
||||
bool AMDGPUTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
|
||||
bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
|
||||
unsigned Alignment,
|
||||
unsigned AddrSpace) const {
|
||||
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
||||
unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
||||
// Disable unrolling if the loop is not vectorized.
|
||||
// TODO: Enable this again.
|
||||
if (VF == 1)
|
||||
|
@ -318,7 +305,7 @@ unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
|||
return 8;
|
||||
}
|
||||
|
||||
bool AMDGPUTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
||||
bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
||||
MemIntrinsicInfo &Info) const {
|
||||
switch (Inst->getIntrinsicID()) {
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
|
@ -347,7 +334,7 @@ bool AMDGPUTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
|
|||
}
|
||||
}
|
||||
|
||||
int AMDGPUTTIImpl::getArithmeticInstrCost(
|
||||
int GCNTTIImpl::getArithmeticInstrCost(
|
||||
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
|
||||
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
|
||||
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
|
||||
|
@ -457,7 +444,7 @@ int AMDGPUTTIImpl::getArithmeticInstrCost(
|
|||
Opd1PropInfo, Opd2PropInfo);
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) {
|
||||
unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
|
||||
// XXX - For some reason this isn't called for switch.
|
||||
switch (Opcode) {
|
||||
case Instruction::Br:
|
||||
|
@ -468,7 +455,7 @@ unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) {
|
|||
}
|
||||
}
|
||||
|
||||
int AMDGPUTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
|
||||
int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
|
||||
bool IsPairwise) {
|
||||
EVT OrigTy = TLI->getValueType(DL, Ty);
|
||||
|
||||
|
@ -483,7 +470,7 @@ int AMDGPUTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
|
|||
return LT.first * getFullRateInstrCost();
|
||||
}
|
||||
|
||||
int AMDGPUTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
|
||||
int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
|
||||
bool IsPairwise,
|
||||
bool IsUnsigned) {
|
||||
EVT OrigTy = TLI->getValueType(DL, Ty);
|
||||
|
@ -499,7 +486,7 @@ int AMDGPUTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
|
|||
return LT.first * getHalfRateInstrCost();
|
||||
}
|
||||
|
||||
int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
||||
int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
||||
unsigned Index) {
|
||||
switch (Opcode) {
|
||||
case Instruction::ExtractElement:
|
||||
|
@ -554,7 +541,7 @@ static bool isArgPassedInSGPR(const Argument *A) {
|
|||
|
||||
/// \returns true if the result of the value could potentially be
|
||||
/// different across workitems in a wavefront.
|
||||
bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
|
||||
bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
|
||||
if (const Argument *A = dyn_cast<Argument>(V))
|
||||
return !isArgPassedInSGPR(A);
|
||||
|
||||
|
@ -584,7 +571,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUTTIImpl::isAlwaysUniform(const Value *V) const {
|
||||
bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
|
||||
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
|
||||
switch (Intrinsic->getIntrinsicID()) {
|
||||
default:
|
||||
|
@ -597,7 +584,7 @@ bool AMDGPUTTIImpl::isAlwaysUniform(const Value *V) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
Type *SubTp) {
|
||||
if (ST->hasVOP3PInsts()) {
|
||||
VectorType *VT = cast<VectorType>(Tp);
|
||||
|
@ -620,7 +607,7 @@ unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Inde
|
|||
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
|
||||
}
|
||||
|
||||
bool AMDGPUTTIImpl::areInlineCompatible(const Function *Caller,
|
||||
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const {
|
||||
const TargetMachine &TM = getTLI()->getTargetMachine();
|
||||
const FeatureBitset &CallerBits =
|
||||
|
@ -632,3 +619,114 @@ bool AMDGPUTTIImpl::areInlineCompatible(const Function *Caller,
|
|||
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
|
||||
return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
|
||||
}
|
||||
|
||||
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP) {
|
||||
CommonTTI.getUnrollingPreferences(L, SE, UP);
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
|
||||
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
|
||||
return getHardwareNumberOfRegisters(Vec);
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getRegisterBitWidth(bool Vector) const {
|
||||
return 32;
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
|
||||
return 32;
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
|
||||
AMDGPUAS AS = ST->getAMDGPUAS();
|
||||
if (AddrSpace == AS.GLOBAL_ADDRESS ||
|
||||
AddrSpace == AS.CONSTANT_ADDRESS)
|
||||
return 128;
|
||||
if (AddrSpace == AS.LOCAL_ADDRESS ||
|
||||
AddrSpace == AS.REGION_ADDRESS)
|
||||
return 64;
|
||||
if (AddrSpace == AS.PRIVATE_ADDRESS)
|
||||
return 32;
|
||||
|
||||
if ((AddrSpace == AS.PARAM_D_ADDRESS ||
|
||||
AddrSpace == AS.PARAM_I_ADDRESS ||
|
||||
(AddrSpace >= AS.CONSTANT_BUFFER_0 &&
|
||||
AddrSpace <= AS.CONSTANT_BUFFER_15)))
|
||||
return 128;
|
||||
llvm_unreachable("unhandled address space");
|
||||
}
|
||||
|
||||
bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
|
||||
unsigned Alignment,
|
||||
unsigned AddrSpace) const {
|
||||
// We allow vectorization of flat stores, even though we may need to decompose
|
||||
// them later if they may access private memory. We don't have enough context
|
||||
// here, and legalization can handle it.
|
||||
if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
|
||||
unsigned Alignment,
|
||||
unsigned AddrSpace) const {
|
||||
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
|
||||
}
|
||||
|
||||
bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
|
||||
unsigned Alignment,
|
||||
unsigned AddrSpace) const {
|
||||
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
||||
// Disable unrolling if the loop is not vectorized.
|
||||
// TODO: Enable this again.
|
||||
if (VF == 1)
|
||||
return 1;
|
||||
|
||||
return 8;
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) {
|
||||
// XXX - For some reason this isn't called for switch.
|
||||
switch (Opcode) {
|
||||
case Instruction::Br:
|
||||
case Instruction::Ret:
|
||||
return 10;
|
||||
default:
|
||||
return BaseT::getCFInstrCost(Opcode);
|
||||
}
|
||||
}
|
||||
|
||||
int R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
||||
unsigned Index) {
|
||||
switch (Opcode) {
|
||||
case Instruction::ExtractElement:
|
||||
case Instruction::InsertElement: {
|
||||
unsigned EltSize
|
||||
= DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
|
||||
if (EltSize < 32) {
|
||||
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
|
||||
}
|
||||
|
||||
// Extracts are just reads of a subregister, so are free. Inserts are
|
||||
// considered free because we don't want to have any cost for scalarizing
|
||||
// operations, and we don't have to copy into a different register class.
|
||||
|
||||
// Dynamic indexing isn't free and is best avoided.
|
||||
return Index == ~0u ? 2 : 0;
|
||||
}
|
||||
default:
|
||||
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
|
||||
}
|
||||
}
|
||||
|
||||
void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP) {
|
||||
CommonTTI.getUnrollingPreferences(L, SE, UP);
|
||||
}
|
||||
|
|
|
@ -47,6 +47,29 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
|
|||
|
||||
const AMDGPUSubtarget *ST;
|
||||
const AMDGPUTargetLowering *TLI;
|
||||
|
||||
public:
|
||||
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()),
|
||||
ST(TM->getSubtargetImpl(F)),
|
||||
TLI(ST->getTargetLowering()) {}
|
||||
|
||||
const AMDGPUSubtarget *getST() const { return ST; }
|
||||
const AMDGPUTargetLowering *getTLI() const { return TLI; }
|
||||
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
};
|
||||
|
||||
class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||
using BaseT = BasicTTIImplBase<GCNTTIImpl>;
|
||||
using TTI = TargetTransformInfo;
|
||||
|
||||
friend BaseT;
|
||||
|
||||
const AMDGPUSubtarget *ST;
|
||||
const AMDGPUTargetLowering *TLI;
|
||||
AMDGPUTTIImpl CommonTTI;
|
||||
bool IsGraphicsShader;
|
||||
|
||||
const FeatureBitset InlineFeatureIgnoreList = {
|
||||
|
@ -99,10 +122,11 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
|
|||
}
|
||||
|
||||
public:
|
||||
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()),
|
||||
ST(TM->getSubtargetImpl(F)),
|
||||
TLI(ST->getTargetLowering()),
|
||||
CommonTTI(TM, F),
|
||||
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
|
||||
|
||||
bool hasBranchDivergence() { return true; }
|
||||
|
@ -182,6 +206,46 @@ public:
|
|||
bool IsUnsigned);
|
||||
};
|
||||
|
||||
class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
|
||||
using BaseT = BasicTTIImplBase<R600TTIImpl>;
|
||||
using TTI = TargetTransformInfo;
|
||||
|
||||
friend BaseT;
|
||||
|
||||
const AMDGPUSubtarget *ST;
|
||||
const AMDGPUTargetLowering *TLI;
|
||||
AMDGPUTTIImpl CommonTTI;
|
||||
|
||||
public:
|
||||
explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()),
|
||||
ST(TM->getSubtargetImpl(F)),
|
||||
TLI(ST->getTargetLowering()),
|
||||
CommonTTI(TM, F) {}
|
||||
|
||||
const AMDGPUSubtarget *getST() const { return ST; }
|
||||
const AMDGPUTargetLowering *getTLI() const { return TLI; }
|
||||
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
unsigned getHardwareNumberOfRegisters(bool Vec) const;
|
||||
unsigned getNumberOfRegisters(bool Vec) const;
|
||||
unsigned getRegisterBitWidth(bool Vector) const;
|
||||
unsigned getMinVectorRegisterBitWidth() const;
|
||||
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
|
||||
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
|
||||
unsigned AddrSpace) const;
|
||||
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
|
||||
unsigned Alignment,
|
||||
unsigned AddrSpace) const;
|
||||
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
|
||||
unsigned Alignment,
|
||||
unsigned AddrSpace) const;
|
||||
unsigned getMaxInterleaveFactor(unsigned VF);
|
||||
unsigned getCFInstrCost(unsigned Opcode);
|
||||
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
|
||||
|
|
Loading…
Reference in New Issue