forked from OSchip/llvm-project
[TTI] Add VecPred argument to getCmpSelInstrCost.
On some targets, like AArch64, vector selects can be efficiently lowered if the vector condition is a compare with a supported predicate. This patch adds a new argument to getCmpSelInstrCost, to indicate the predicate of the feeding select condition. Note that it is not sufficient to use the context instruction when querying the cost of a vector select starting from a scalar one, because the condition of the vector select could be composed of compares with different predicates. This change greatly improves modeling the costs of certain compare/select patterns on AArch64. I am also planning on putting up patches to make use of the new argument in SLPVectorizer & LV. Reviewed By: dmgreen, RKSimon Differential Revision: https://reviews.llvm.org/D90070
This commit is contained in:
parent
ec809e4cfe
commit
73f01e3df5
|
@ -21,6 +21,7 @@
|
|||
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
|
||||
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
|
||||
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
@ -1092,10 +1093,14 @@ public:
|
|||
|
||||
/// \returns The expected cost of compare and select instructions. If there
|
||||
/// is an existing instruction that holds Opcode, it may be passed in the
|
||||
/// 'I' parameter.
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
||||
const Instruction *I = nullptr) const;
|
||||
/// 'I' parameter. The \p VecPred parameter can be used to indicate the select
|
||||
/// is using a compare with the specified predicate as condition. When vector
|
||||
/// types are passed, \p VecPred must be used for all lanes.
|
||||
int getCmpSelInstrCost(
|
||||
unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
|
||||
CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
||||
const Instruction *I = nullptr) const;
|
||||
|
||||
/// \return The expected cost of vector Insert and Extract.
|
||||
/// Use -1 to indicate that there is no information on the index value.
|
||||
|
@ -1534,6 +1539,7 @@ public:
|
|||
virtual int getCFInstrCost(unsigned Opcode,
|
||||
TTI::TargetCostKind CostKind) = 0;
|
||||
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) = 0;
|
||||
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
|
@ -1975,9 +1981,10 @@ public:
|
|||
return Impl.getCFInstrCost(Opcode, CostKind);
|
||||
}
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) override {
|
||||
return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
|
||||
}
|
||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
|
||||
return Impl.getVectorInstrCost(Opcode, Val, Index);
|
||||
|
|
|
@ -478,6 +478,7 @@ public:
|
|||
}
|
||||
|
||||
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) const {
|
||||
return 1;
|
||||
|
@ -947,12 +948,14 @@ public:
|
|||
case Instruction::Select: {
|
||||
Type *CondTy = U->getOperand(0)->getType();
|
||||
return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE,
|
||||
CostKind, I);
|
||||
}
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp: {
|
||||
Type *ValTy = U->getOperand(0)->getType();
|
||||
return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
|
||||
cast<CmpInst>(U)->getPredicate(),
|
||||
CostKind, I);
|
||||
}
|
||||
case Instruction::InsertElement: {
|
||||
|
|
|
@ -881,6 +881,7 @@ public:
|
|||
}
|
||||
|
||||
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr) {
|
||||
const TargetLoweringBase *TLI = getTLI();
|
||||
|
@ -889,7 +890,8 @@ public:
|
|||
|
||||
// TODO: Handle other cost kinds.
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
|
||||
I);
|
||||
|
||||
// Selects on vectors are actually vector selects.
|
||||
if (ISD == ISD::SELECT) {
|
||||
|
@ -914,7 +916,7 @@ public:
|
|||
if (CondTy)
|
||||
CondTy = CondTy->getScalarType();
|
||||
unsigned Cost = thisT()->getCmpSelInstrCost(
|
||||
Opcode, ValVTy->getScalarType(), CondTy, CostKind, I);
|
||||
Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
|
||||
|
||||
// Return the cost of multiple scalar invocation plus the cost of
|
||||
// inserting and extracting the values.
|
||||
|
@ -1241,10 +1243,12 @@ public:
|
|||
// For non-rotates (X != Y) we must add shift-by-zero handling costs.
|
||||
if (X != Y) {
|
||||
Type *CondTy = RetTy->getWithNewBitWidth(1);
|
||||
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
|
||||
CostKind);
|
||||
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
|
||||
CondTy, CostKind);
|
||||
Cost +=
|
||||
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
Cost +=
|
||||
thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
}
|
||||
return Cost;
|
||||
}
|
||||
|
@ -1483,10 +1487,12 @@ public:
|
|||
Type *CondTy = RetTy->getWithNewBitWidth(1);
|
||||
unsigned Cost = 0;
|
||||
// TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
|
||||
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
|
||||
CostKind);
|
||||
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
|
||||
CostKind);
|
||||
Cost +=
|
||||
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
Cost +=
|
||||
thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
// TODO: Should we add an OperandValueProperties::OP_Zero property?
|
||||
if (IID == Intrinsic::abs)
|
||||
Cost += thisT()->getArithmeticInstrCost(
|
||||
|
@ -1508,10 +1514,12 @@ public:
|
|||
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
|
||||
ScalarizationCostPassed);
|
||||
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
|
||||
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
|
||||
CostKind);
|
||||
Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
|
||||
CondTy, CostKind);
|
||||
Cost +=
|
||||
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
Cost += 2 * thisT()->getCmpSelInstrCost(
|
||||
BinaryOperator::Select, RetTy, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
return Cost;
|
||||
}
|
||||
case Intrinsic::uadd_sat:
|
||||
|
@ -1527,8 +1535,9 @@ public:
|
|||
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
|
||||
ScalarizationCostPassed);
|
||||
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
|
||||
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
|
||||
CostKind);
|
||||
Cost +=
|
||||
thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
return Cost;
|
||||
}
|
||||
case Intrinsic::smul_fix:
|
||||
|
@ -1573,10 +1582,12 @@ public:
|
|||
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
|
||||
unsigned Cost = 0;
|
||||
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
|
||||
Cost += 3 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
|
||||
OverflowTy, CostKind);
|
||||
Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, OverflowTy,
|
||||
OverflowTy, CostKind);
|
||||
Cost += 3 * thisT()->getCmpSelInstrCost(
|
||||
BinaryOperator::ICmp, SumTy, OverflowTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
Cost += 2 * thisT()->getCmpSelInstrCost(
|
||||
BinaryOperator::ICmp, OverflowTy, OverflowTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
|
||||
CostKind);
|
||||
return Cost;
|
||||
|
@ -1591,8 +1602,9 @@ public:
|
|||
|
||||
unsigned Cost = 0;
|
||||
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
|
||||
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
|
||||
OverflowTy, CostKind);
|
||||
Cost +=
|
||||
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
return Cost;
|
||||
}
|
||||
case Intrinsic::smul_with_overflow:
|
||||
|
@ -1621,8 +1633,9 @@ public:
|
|||
CostKind, TTI::OK_AnyValue,
|
||||
TTI::OK_UniformConstantValue);
|
||||
|
||||
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
|
||||
OverflowTy, CostKind);
|
||||
Cost +=
|
||||
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
return Cost;
|
||||
}
|
||||
case Intrinsic::ctpop:
|
||||
|
@ -1864,9 +1877,10 @@ public:
|
|||
(IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
|
||||
Ty, NumVecElts, SubTy);
|
||||
MinMaxCost +=
|
||||
thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) +
|
||||
thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind) +
|
||||
thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
|
||||
CostKind);
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
Ty = SubTy;
|
||||
++LongVectorCount;
|
||||
}
|
||||
|
@ -1888,9 +1902,10 @@ public:
|
|||
thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty);
|
||||
MinMaxCost +=
|
||||
NumReduxLevels *
|
||||
(thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
|
||||
(thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind) +
|
||||
thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
|
||||
CostKind));
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind));
|
||||
// The last min/max should be in vector registers and we counted it above.
|
||||
// So just need a single extractelement.
|
||||
return ShuffleCost + MinMaxCost +
|
||||
|
|
|
@ -807,11 +807,13 @@ int TargetTransformInfo::getCFInstrCost(unsigned Opcode,
|
|||
|
||||
int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) const {
|
||||
assert((I == nullptr || I->getOpcode() == Opcode) &&
|
||||
"Opcode should reflect passed instruction.");
|
||||
int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
int Cost =
|
||||
TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
|
||||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||
return Cost;
|
||||
}
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64ExpandImm.h"
|
||||
#include "AArch64TargetTransformInfo.h"
|
||||
#include "AArch64ExpandImm.h"
|
||||
#include "MCTargetDesc/AArch64AddressingModes.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
|
@ -16,9 +16,11 @@
|
|||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/IntrinsicsAArch64.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include <algorithm>
|
||||
using namespace llvm;
|
||||
using namespace llvm::PatternMatch;
|
||||
|
||||
#define DEBUG_TYPE "aarch64tti"
|
||||
|
||||
|
@ -675,12 +677,13 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
|
|||
}
|
||||
|
||||
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
Type *CondTy,
|
||||
Type *CondTy, CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
// TODO: Handle other cost kinds.
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
|
||||
I);
|
||||
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
// We don't lower some vector selects well that are wider than the register
|
||||
|
@ -688,6 +691,26 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
|||
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
|
||||
// We would need this many instructions to hide the scalarization happening.
|
||||
const int AmortizationCost = 20;
|
||||
|
||||
// If VecPred is not set, check if we can get a predicate from the context
|
||||
// instruction, if its type matches the requested ValTy.
|
||||
if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) {
|
||||
CmpInst::Predicate CurrentPred;
|
||||
if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(),
|
||||
m_Value())))
|
||||
VecPred = CurrentPred;
|
||||
}
|
||||
// Check if we have a compare/select chain that can be lowered using CMxx &
|
||||
// BFI pair.
|
||||
if (CmpInst::isIntPredicate(VecPred)) {
|
||||
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
|
||||
MVT::v8i16, MVT::v2i32, MVT::v4i32,
|
||||
MVT::v2i64};
|
||||
auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }))
|
||||
return LT.first;
|
||||
}
|
||||
|
||||
static const TypeConversionCostTblEntry
|
||||
VectorSelectTbl[] = {
|
||||
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
|
||||
|
@ -707,7 +730,7 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
|||
return Entry->Cost;
|
||||
}
|
||||
}
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
|
||||
}
|
||||
|
||||
AArch64TTIImpl::TTI::MemCmpExpansionOptions
|
||||
|
|
|
@ -141,6 +141,7 @@ public:
|
|||
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
|
||||
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
|
||||
|
|
|
@ -810,6 +810,7 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
|||
}
|
||||
|
||||
int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
|
@ -839,7 +840,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|||
}
|
||||
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
|
||||
I);
|
||||
|
||||
// On NEON a vector select gets lowered to vbsl.
|
||||
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
|
||||
|
@ -866,8 +868,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|||
int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
|
||||
? ST->getMVEVectorCostFactor()
|
||||
: 1;
|
||||
return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind,
|
||||
I);
|
||||
return BaseCost *
|
||||
BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
|
||||
}
|
||||
|
||||
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
|
||||
|
|
|
@ -213,6 +213,7 @@ public:
|
|||
const Instruction *I = nullptr);
|
||||
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
|
||||
|
|
|
@ -242,13 +242,16 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(
|
|||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) {
|
||||
Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
|
||||
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
|
||||
if (Opcode == Instruction::FCmp)
|
||||
return LT.first + FloatFactor * getTypeNumElements(ValTy);
|
||||
}
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getArithmeticInstrCost(
|
||||
|
|
|
@ -134,6 +134,8 @@ public:
|
|||
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
|
||||
bool UseMaskForCond = false, bool UseMaskForGaps = false);
|
||||
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
unsigned getArithmeticInstrCost(
|
||||
|
|
|
@ -978,9 +978,11 @@ int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
}
|
||||
|
||||
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
int Cost =
|
||||
BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
|
||||
// TODO: Handle other cost kinds.
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return Cost;
|
||||
|
|
|
@ -113,6 +113,7 @@ public:
|
|||
const Instruction *I = nullptr);
|
||||
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||
|
|
|
@ -847,11 +847,11 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {
|
|||
}
|
||||
|
||||
int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
Type *CondTy,
|
||||
Type *CondTy, CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
|
||||
|
||||
if (!ValTy->isVectorTy()) {
|
||||
switch (Opcode) {
|
||||
|
@ -927,7 +927,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
|||
}
|
||||
}
|
||||
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
|
||||
}
|
||||
|
||||
int SystemZTTIImpl::
|
||||
|
|
|
@ -97,6 +97,7 @@ public:
|
|||
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||
|
|
|
@ -2084,11 +2084,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
}
|
||||
|
||||
int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I) {
|
||||
// TODO: Handle other cost kinds.
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
|
||||
I);
|
||||
|
||||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
|
@ -2272,7 +2274,7 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|||
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
|
||||
return LT.first * (ExtraCost + Entry->Cost);
|
||||
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
|
||||
}
|
||||
|
||||
unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
|
||||
|
@ -3223,7 +3225,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
|
|||
getScalarizationOverhead(MaskTy, DemandedElts, false, true);
|
||||
int ScalarCompareCost = getCmpSelInstrCost(
|
||||
Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr,
|
||||
CostKind);
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
|
||||
int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
|
||||
int ValueSplitCost =
|
||||
|
@ -3644,8 +3646,10 @@ int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) {
|
|||
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
||||
// Otherwise fall back to cmp+select.
|
||||
return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
|
||||
getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind);
|
||||
return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CmpInst::BAD_ICMP_PREDICATE,
|
||||
CostKind) +
|
||||
getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
}
|
||||
|
||||
int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
|
||||
|
@ -4123,9 +4127,9 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
|
|||
FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF);
|
||||
MaskUnpackCost =
|
||||
getScalarizationOverhead(MaskTy, DemandedElts, false, true);
|
||||
int ScalarCompareCost =
|
||||
getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()),
|
||||
nullptr, CostKind);
|
||||
int ScalarCompareCost = getCmpSelInstrCost(
|
||||
Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
|
||||
MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
|
||||
}
|
||||
|
|
|
@ -133,6 +133,7 @@ public:
|
|||
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const Instruction *I = nullptr);
|
||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||
|
|
|
@ -2236,9 +2236,9 @@ template<typename T> static int costAndCollectOperands(
|
|||
unsigned MinIdx, unsigned MaxIdx) {
|
||||
Operations.emplace_back(Opcode, MinIdx, MaxIdx);
|
||||
Type *OpType = S->getOperand(0)->getType();
|
||||
return NumRequired *
|
||||
TTI.getCmpSelInstrCost(Opcode, OpType,
|
||||
CmpInst::makeCmpResultType(OpType), CostKind);
|
||||
return NumRequired * TTI.getCmpSelInstrCost(
|
||||
Opcode, OpType, CmpInst::makeCmpResultType(OpType),
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
};
|
||||
|
||||
switch (S->getSCEVType()) {
|
||||
|
|
|
@ -2026,7 +2026,7 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
|
|||
|
||||
BudgetRemaining -=
|
||||
TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
|
||||
CostKind);
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
|
||||
// Don't convert to selects if we could remove undefined behavior instead.
|
||||
if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
|
||||
|
|
|
@ -6613,7 +6613,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
|||
TTI.getCmpSelInstrCost(
|
||||
Instruction::Select, ToVectorTy(Phi->getType(), VF),
|
||||
ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF),
|
||||
CostKind);
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
|
||||
return TTI.getCFInstrCost(Instruction::PHI, CostKind);
|
||||
}
|
||||
|
@ -6702,7 +6702,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
|||
CondTy = VectorType::get(CondTy, VF);
|
||||
}
|
||||
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
|
||||
CostKind, I);
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind, I);
|
||||
}
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp: {
|
||||
|
@ -6711,8 +6711,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
|||
if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
|
||||
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
|
||||
VectorTy = ToVectorTy(ValTy, VF);
|
||||
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, CostKind,
|
||||
I);
|
||||
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind, I);
|
||||
}
|
||||
case Instruction::Store:
|
||||
case Instruction::Load: {
|
||||
|
|
|
@ -3539,16 +3539,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
case Instruction::ICmp:
|
||||
case Instruction::Select: {
|
||||
// Calculate the cost of this instruction.
|
||||
int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
|
||||
Builder.getInt1Ty(),
|
||||
CostKind, VL0);
|
||||
int ScalarEltCost =
|
||||
TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
|
||||
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
|
||||
CostKind, VL0);
|
||||
int VecCost =
|
||||
TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
|
||||
// Check if it is possible and profitable to use min/max for selects in
|
||||
// VL.
|
||||
//
|
||||
|
@ -3560,8 +3561,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
// If the selects are the only uses of the compares, they will be dead
|
||||
// and we can adjust the cost by removing their cost.
|
||||
if (IntrinsicAndUse.second)
|
||||
IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy,
|
||||
MaskTy, CostKind);
|
||||
IntrinsicCost -=
|
||||
TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||
VecCost = std::min(VecCost, IntrinsicCost);
|
||||
}
|
||||
return ReuseShuffleCost + VecCost - ScalarCost;
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
; COST-LABEL: v8i8_select_eq
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp eq <8 x i8> %a, %b
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i8> %a, <8 x i8> %c
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i8> %a, <8 x i8> %c
|
||||
|
||||
; CODE-LABEL: v8i8_select_eq
|
||||
; CODE: bb.0
|
||||
|
@ -19,7 +19,7 @@ define <8 x i8> @v8i8_select_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
|
|||
|
||||
; COST-LABEL: v16i8_select_sgt
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp sgt <16 x i8> %a, %b
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %s.1 = select <16 x i1> %cmp.1, <16 x i8> %a, <16 x i8> %c
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <16 x i1> %cmp.1, <16 x i8> %a, <16 x i8> %c
|
||||
|
||||
; CODE-LABEL: v16i8_select_sgt
|
||||
; CODE: bb.0
|
||||
|
@ -35,7 +35,7 @@ define <16 x i8> @v16i8_select_sgt(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
|||
|
||||
; COST-LABEL: v4i16_select_ne
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ne <4 x i16> %a, %b
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i16> %a, <4 x i16> %c
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i16> %a, <4 x i16> %c
|
||||
|
||||
; CODE-LABEL: v4i16_select_ne
|
||||
; CODE: bb.0
|
||||
|
@ -51,7 +51,7 @@ define <4 x i16> @v4i16_select_ne(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) {
|
|||
|
||||
; COST-LABEL: v8i16_select_ugt
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ugt <8 x i16> %a, %b
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i16> %a, <8 x i16> %c
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i16> %a, <8 x i16> %c
|
||||
|
||||
; CODE-LABEL: v8i16_select_ugt
|
||||
; CODE: bb.0
|
||||
|
@ -67,7 +67,7 @@ define <8 x i16> @v8i16_select_ugt(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
|
|||
|
||||
; COST-LABEL: v2i32_select_ule
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ule <2 x i32> %a, %b
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i32> %a, <2 x i32> %c
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i32> %a, <2 x i32> %c
|
||||
|
||||
; CODE-LABEL: v2i32_select_ule
|
||||
; CODE: bb.0
|
||||
|
@ -83,7 +83,7 @@ define <2 x i32> @v2i32_select_ule(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
|
|||
|
||||
; COST-LABEL: v4i32_select_ult
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ult <4 x i32> %a, %b
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i32> %a, <4 x i32> %c
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i32> %a, <4 x i32> %c
|
||||
|
||||
; CODE-LABEL: v4i32_select_ult
|
||||
; CODE: bb.0
|
||||
|
@ -99,7 +99,7 @@ define <4 x i32> @v4i32_select_ult(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|||
|
||||
; COST-LABEL: v2i64_select_sle
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp sle <2 x i64> %a, %b
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i64> %a, <2 x i64> %c
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i64> %a, <2 x i64> %c
|
||||
|
||||
; CODE-LABEL: v2i64_select_sle
|
||||
; CODE: bb.0
|
||||
|
@ -115,7 +115,7 @@ define <2 x i64> @v2i64_select_sle(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
|||
|
||||
; COST-LABEL: v3i64_select_sle
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cmp.1 = icmp sle <3 x i64> %a, %b
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c
|
||||
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c
|
||||
|
||||
; CODE-LABEL: v3i64_select_sle
|
||||
; CODE: bb.0
|
||||
|
|
Loading…
Reference in New Issue