forked from OSchip/llvm-project
[AMDGPU][CostModel] Refine cost model for control-flow instructions.
Added cost estimation for switch instruction, updated costs of branches, fixed phi cost. Had to increase `-amdgpu-unroll-threshold-if` default value since conditional branch cost (size) was corrected to higher value. Test renamed to "control-flow.ll". Removed redundant code in `X86TTIImpl::getCFInstrCost()` and `PPCTTIImpl::getCFInstrCost()`. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D96805
This commit is contained in:
parent
4f173c0c42
commit
8f4b7e94a2
|
@ -1103,9 +1103,10 @@ public:
|
|||
unsigned Index = -1) const;
|
||||
|
||||
/// \return The expected cost of control-flow related instructions such as
|
||||
/// Phi, Ret, Br.
|
||||
/// Phi, Ret, Br, Switch.
|
||||
int getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||
const Instruction *I = nullptr) const;
|
||||
|
||||
/// \returns The expected cost of compare and select instructions. If there
|
||||
/// is an existing instruction that holds Opcode, it may be passed in the
|
||||
|
@ -1573,8 +1574,8 @@ public:
|
|||
const Instruction *I) = 0;
|
||||
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
|
||||
VectorType *VecTy, unsigned Index) = 0;
|
||||
virtual int getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind) = 0;
|
||||
virtual int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr) = 0;
|
||||
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
|
@ -2040,8 +2041,9 @@ public:
|
|||
unsigned Index) override {
|
||||
return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
|
||||
}
|
||||
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
|
||||
return Impl.getCFInstrCost(Opcode, CostKind);
|
||||
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr) override {
|
||||
return Impl.getCFInstrCost(Opcode, CostKind, I);
|
||||
}
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
|
|
|
@ -512,7 +512,8 @@ public:
|
|||
return 1;
|
||||
}
|
||||
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const {
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr) const {
|
||||
// A phi would be free, unless we're costing the throughput because it
|
||||
// will require a register.
|
||||
if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
|
||||
|
@ -933,7 +934,8 @@ public:
|
|||
case Instruction::Br:
|
||||
case Instruction::Ret:
|
||||
case Instruction::PHI:
|
||||
return TargetTTI->getCFInstrCost(Opcode, CostKind);
|
||||
case Instruction::Switch:
|
||||
return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
|
||||
case Instruction::ExtractValue:
|
||||
case Instruction::Freeze:
|
||||
return TTI::TCC_Free;
|
||||
|
|
|
@ -897,8 +897,9 @@ public:
|
|||
TTI::CastContextHint::None, TTI::TCK_RecipThroughput);
|
||||
}
|
||||
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
|
||||
return BaseT::getCFInstrCost(Opcode, CostKind);
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr) {
|
||||
return BaseT::getCFInstrCost(Opcode, CostKind, I);
|
||||
}
|
||||
|
||||
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
|
|
|
@ -783,8 +783,11 @@ int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
|
|||
}
|
||||
|
||||
int TargetTransformInfo::getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind) const {
|
||||
int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind);
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) const {
|
||||
assert((I == nullptr || I->getOpcode() == Opcode) &&
|
||||
"Opcode should reflect passed instruction.");
|
||||
int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind, I);
|
||||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||
return Cost;
|
||||
}
|
||||
|
@ -1374,6 +1377,7 @@ TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
|
|||
case Instruction::ExtractValue:
|
||||
case Instruction::ShuffleVector:
|
||||
case Instruction::Call:
|
||||
case Instruction::Switch:
|
||||
return getUserCost(I, CostKind);
|
||||
default:
|
||||
// We don't have any information on this instruction.
|
||||
|
|
|
@ -653,7 +653,8 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
|
|||
}
|
||||
|
||||
unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return Opcode == Instruction::PHI ? 0 : 1;
|
||||
assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind");
|
||||
|
|
|
@ -139,7 +139,8 @@ public:
|
|||
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
|
||||
unsigned Index);
|
||||
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
|
||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ static cl::opt<unsigned> UnrollThresholdLocal(
|
|||
static cl::opt<unsigned> UnrollThresholdIf(
|
||||
"amdgpu-unroll-threshold-if",
|
||||
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
|
||||
cl::init(150), cl::Hidden);
|
||||
cl::init(200), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> UnrollRuntimeLocal(
|
||||
"amdgpu-unroll-runtime-local",
|
||||
|
@ -106,6 +106,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
UP.MaxCount = std::numeric_limits<unsigned>::max();
|
||||
UP.Partial = true;
|
||||
|
||||
// Conditional branch in a loop back edge needs 3 additional exec
|
||||
// manipulations in average.
|
||||
UP.BEInsns += 3;
|
||||
|
||||
// TODO: Do we want runtime unrolling?
|
||||
|
||||
// Maximum alloca size than can fit registers. Reserve 16 registers.
|
||||
|
@ -809,18 +813,37 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|||
}
|
||||
|
||||
unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
|
||||
return Opcode == Instruction::PHI ? 0 : 1;
|
||||
|
||||
// XXX - For some reason this isn't called for switch.
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
assert((I == nullptr || I->getOpcode() == Opcode) &&
|
||||
"Opcode should reflect passed instruction.");
|
||||
const bool SCost =
|
||||
(CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency);
|
||||
const int CBrCost = SCost ? 5 : 7;
|
||||
switch (Opcode) {
|
||||
case Instruction::Br:
|
||||
case Instruction::Ret:
|
||||
return 10;
|
||||
default:
|
||||
return BaseT::getCFInstrCost(Opcode, CostKind);
|
||||
case Instruction::Br: {
|
||||
// Branch instruction takes about 4 slots on gfx900.
|
||||
auto BI = dyn_cast_or_null<BranchInst>(I);
|
||||
if (BI && BI->isUnconditional())
|
||||
return SCost ? 1 : 4;
|
||||
// Suppose conditional branch takes additional 3 exec manipulations
|
||||
// instructions in average.
|
||||
return CBrCost;
|
||||
}
|
||||
case Instruction::Switch: {
|
||||
auto SI = dyn_cast_or_null<SwitchInst>(I);
|
||||
// Each case (including default) takes 1 cmp + 1 cbr instructions in
|
||||
// average.
|
||||
return (SI ? (SI->getNumCases() + 1) : 4) * (CBrCost + 1);
|
||||
}
|
||||
case Instruction::Ret:
|
||||
return SCost ? 1 : 10;
|
||||
case Instruction::PHI:
|
||||
// TODO: 1. A prediction phi won't be eliminated?
|
||||
// 2. Estimate data copy instructions in this case.
|
||||
return 1;
|
||||
}
|
||||
return BaseT::getCFInstrCost(Opcode, CostKind, I);
|
||||
}
|
||||
|
||||
int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
|
@ -1292,7 +1315,8 @@ unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
|||
}
|
||||
|
||||
unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
|
||||
return Opcode == Instruction::PHI ? 0 : 1;
|
||||
|
||||
|
@ -1302,7 +1326,7 @@ unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode,
|
|||
case Instruction::Ret:
|
||||
return 10;
|
||||
default:
|
||||
return BaseT::getCFInstrCost(Opcode, CostKind);
|
||||
return BaseT::getCFInstrCost(Opcode, CostKind, I);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -163,7 +163,8 @@ public:
|
|||
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
|
||||
const Instruction *CxtI = nullptr);
|
||||
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
|
||||
bool isInlineAsmSourceOfDivergence(const CallInst *CI,
|
||||
ArrayRef<unsigned> Indices = {}) const;
|
||||
|
@ -253,7 +254,8 @@ public:
|
|||
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
|
||||
unsigned AddrSpace) const;
|
||||
unsigned getMaxInterleaveFactor(unsigned VF);
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
|
||||
};
|
||||
|
||||
|
|
|
@ -379,7 +379,8 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
|
|||
return getIntImmCost(Imm, Ty, CostKind);
|
||||
}
|
||||
|
||||
int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
|
||||
int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
if (CostKind == TTI::TCK_RecipThroughput &&
|
||||
(ST->hasNEON() || ST->hasMVEIntegerOps())) {
|
||||
// FIXME: The vectorizer is highly sensistive to the cost of these
|
||||
|
@ -388,7 +389,7 @@ int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
|
|||
// vector targets.
|
||||
return 0;
|
||||
}
|
||||
return BaseT::getCFInstrCost(Opcode, CostKind);
|
||||
return BaseT::getCFInstrCost(Opcode, CostKind, I);
|
||||
}
|
||||
|
||||
int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
|
|
|
@ -198,8 +198,8 @@ public:
|
|||
|
||||
bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
|
||||
|
||||
int getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind);
|
||||
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
|
||||
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
|
||||
|
|
|
@ -153,7 +153,8 @@ public:
|
|||
const Instruction *I = nullptr);
|
||||
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -1000,11 +1000,12 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
|
|||
nullptr);
|
||||
}
|
||||
|
||||
int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
|
||||
int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return Opcode == Instruction::PHI ? 0 : 1;
|
||||
// Branches are assumed to be predicted.
|
||||
return CostKind == TTI::TCK_RecipThroughput ? 0 : 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
|
|
|
@ -112,7 +112,8 @@ public:
|
|||
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
|
||||
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
|
|
|
@ -4076,12 +4076,13 @@ int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
|
|||
return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
|
||||
}
|
||||
|
||||
unsigned
|
||||
X86TTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
|
||||
unsigned X86TTIImpl::getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return Opcode == Instruction::PHI ? 0 : 1;
|
||||
// Branches are assumed to be predicted.
|
||||
return CostKind == TTI::TCK_RecipThroughput ? 0 : 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int X86TTIImpl::getGatherOverhead() const {
|
||||
|
|
|
@ -203,7 +203,8 @@ public:
|
|||
|
||||
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
|
||||
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
|
||||
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
|
||||
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
|
||||
Type *Ty, TTI::TargetCostKind CostKind,
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
|
||||
|
||||
; CHECK: 'test_br_cost'
|
||||
; CHECK: estimated cost of 10 for instruction: br i1
|
||||
; CHECK: estimated cost of 10 for instruction: br label
|
||||
; CHECK: estimated cost of 10 for instruction: ret void
|
||||
define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
|
||||
bb0:
|
||||
br i1 undef, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
%vec = load i32, i32 addrspace(1)* %vaddr
|
||||
%add = add i32 %vec, %b
|
||||
store i32 %add, i32 addrspace(1)* %out
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
; CHECK: 'test_switch_cost'
|
||||
; CHECK: estimated cost of -1 for instruction: switch
|
||||
define amdgpu_kernel void @test_switch_cost(i32 %a) #0 {
|
||||
entry:
|
||||
switch i32 %a, label %default [
|
||||
i32 0, label %case0
|
||||
i32 1, label %case1
|
||||
]
|
||||
|
||||
case0:
|
||||
store volatile i32 undef, i32 addrspace(1)* undef
|
||||
ret void
|
||||
|
||||
case1:
|
||||
store volatile i32 undef, i32 addrspace(1)* undef
|
||||
ret void
|
||||
|
||||
default:
|
||||
store volatile i32 undef, i32 addrspace(1)* undef
|
||||
ret void
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck --check-prefixes=ALL,SPEED %s
|
||||
; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck --check-prefixes=ALL,SIZE %s
|
||||
|
||||
; ALL-LABEL: 'test_br_cost'
|
||||
; SPEED: estimated cost of 7 for instruction: br i1
|
||||
; SPEED: estimated cost of 4 for instruction: br label
|
||||
; SPEED: estimated cost of 1 for instruction: %phi = phi i32 [
|
||||
; SPEED: estimated cost of 10 for instruction: ret void
|
||||
; SIZE: estimated cost of 5 for instruction: br i1
|
||||
; SIZE: estimated cost of 1 for instruction: br label
|
||||
; SIZE: estimated cost of 1 for instruction: %phi = phi i32 [
|
||||
; SIZE: estimated cost of 1 for instruction: ret void
|
||||
define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
|
||||
bb0:
|
||||
br i1 undef, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
%vec = load i32, i32 addrspace(1)* %vaddr
|
||||
%add = add i32 %vec, %b
|
||||
store i32 %add, i32 addrspace(1)* %out
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
%phi = phi i32 [ %b, %bb0 ], [ %add, %bb1 ]
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL-LABEL: 'test_switch_cost'
|
||||
; SPEED: estimated cost of 24 for instruction: switch
|
||||
; SIZE: estimated cost of 18 for instruction: switch
|
||||
define amdgpu_kernel void @test_switch_cost(i32 %a) #0 {
|
||||
entry:
|
||||
switch i32 %a, label %default [
|
||||
i32 0, label %case0
|
||||
i32 1, label %case1
|
||||
]
|
||||
|
||||
case0:
|
||||
store volatile i32 undef, i32 addrspace(1)* undef
|
||||
ret void
|
||||
|
||||
case1:
|
||||
store volatile i32 undef, i32 addrspace(1)* undef
|
||||
ret void
|
||||
|
||||
default:
|
||||
store volatile i32 undef, i32 addrspace(1)* undef
|
||||
ret void
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
|
@ -81,8 +81,7 @@ entry:
|
|||
|
||||
for.body: ; preds = %entry, %for.inc
|
||||
%i1 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%and = and i32 %i1, 1
|
||||
%tobool = icmp eq i32 %and, 0
|
||||
%tobool = icmp eq i32 %i1, 0
|
||||
br i1 %tobool, label %for.inc, label %if.then
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
|
@ -93,7 +92,7 @@ if.then: ; preds = %for.body
|
|||
|
||||
for.inc: ; preds = %for.body, %if.then
|
||||
%inc = add nuw nsw i32 %i1, 1
|
||||
%cmp = icmp ult i32 %inc, 48
|
||||
%cmp = icmp ult i32 %inc, 38
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -unroll-threshold=75 -unroll-peel-count=0 -unroll-allow-partial=false -unroll-max-iteration-count-to-analyze=16 < %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -unroll-threshold=49 -unroll-peel-count=0 -unroll-allow-partial=false -unroll-max-iteration-count-to-analyze=16 < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @test_func_addrspacecast_cost_noop(
|
||||
; CHECK-NOT: br i1
|
||||
|
|
Loading…
Reference in New Issue