forked from OSchip/llvm-project
[NFCI] Move cost estimation from TargetLowering to TargetTransformInfo.
TragetLowering had two last InstructionCost related `getTypeLegalizationCost()` and `getScalingFactorCost()` members, but all other costs are processed in TTI. E.g. it is not comfortable to use other TTI members in these two functions overrided in a target. Minor refactoring: `getTypeLegalizationCost()` now doesn't need DataLayout parameter - it was always passed from TTI. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D117723
This commit is contained in:
parent
56a34451e1
commit
7ed3d81333
|
@ -368,7 +368,9 @@ public:
|
||||||
AM.BaseOffs = BaseOffset;
|
AM.BaseOffs = BaseOffset;
|
||||||
AM.HasBaseReg = HasBaseReg;
|
AM.HasBaseReg = HasBaseReg;
|
||||||
AM.Scale = Scale;
|
AM.Scale = Scale;
|
||||||
return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
|
if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
|
||||||
|
return 0;
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isTruncateFree(Type *Ty1, Type *Ty2) {
|
bool isTruncateFree(Type *Ty1, Type *Ty2) {
|
||||||
|
@ -784,6 +786,41 @@ public:
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Estimate the cost of type-legalization and the legalized type.
|
||||||
|
std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const {
|
||||||
|
LLVMContext &C = Ty->getContext();
|
||||||
|
EVT MTy = getTLI()->getValueType(DL, Ty);
|
||||||
|
|
||||||
|
InstructionCost Cost = 1;
|
||||||
|
// We keep legalizing the type until we find a legal kind. We assume that
|
||||||
|
// the only operation that costs anything is the split. After splitting
|
||||||
|
// we need to handle two types.
|
||||||
|
while (true) {
|
||||||
|
TargetLoweringBase::LegalizeKind LK = getTLI()->getTypeConversion(C, MTy);
|
||||||
|
|
||||||
|
if (LK.first == TargetLoweringBase::TypeScalarizeScalableVector) {
|
||||||
|
// Ensure we return a sensible simple VT here, since many callers of
|
||||||
|
// this function require it.
|
||||||
|
MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
|
||||||
|
return std::make_pair(InstructionCost::getInvalid(), VT);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LK.first == TargetLoweringBase::TypeLegal)
|
||||||
|
return std::make_pair(Cost, MTy.getSimpleVT());
|
||||||
|
|
||||||
|
if (LK.first == TargetLoweringBase::TypeSplitVector ||
|
||||||
|
LK.first == TargetLoweringBase::TypeExpandInteger)
|
||||||
|
Cost *= 2;
|
||||||
|
|
||||||
|
// Do not loop with f128 type.
|
||||||
|
if (MTy == LK.second)
|
||||||
|
return std::make_pair(Cost, MTy.getSimpleVT());
|
||||||
|
|
||||||
|
// Keep legalizing the type.
|
||||||
|
MTy = LK.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
|
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
|
||||||
|
|
||||||
InstructionCost getArithmeticInstrCost(
|
InstructionCost getArithmeticInstrCost(
|
||||||
|
@ -806,7 +843,7 @@ public:
|
||||||
Opd1PropInfo, Opd2PropInfo,
|
Opd1PropInfo, Opd2PropInfo,
|
||||||
Args, CxtI);
|
Args, CxtI);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
|
|
||||||
bool IsFloat = Ty->isFPOrFPVectorTy();
|
bool IsFloat = Ty->isFPOrFPVectorTy();
|
||||||
// Assume that floating point arithmetic operations cost twice as much as
|
// Assume that floating point arithmetic operations cost twice as much as
|
||||||
|
@ -940,10 +977,8 @@ public:
|
||||||
const TargetLoweringBase *TLI = getTLI();
|
const TargetLoweringBase *TLI = getTLI();
|
||||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||||
assert(ISD && "Invalid opcode");
|
assert(ISD && "Invalid opcode");
|
||||||
std::pair<InstructionCost, MVT> SrcLT =
|
std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
|
||||||
TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
|
||||||
std::pair<InstructionCost, MVT> DstLT =
|
|
||||||
TLI->getTypeLegalizationCost(DL, Dst);
|
|
||||||
|
|
||||||
TypeSize SrcSize = SrcLT.second.getSizeInBits();
|
TypeSize SrcSize = SrcLT.second.getSizeInBits();
|
||||||
TypeSize DstSize = DstLT.second.getSizeInBits();
|
TypeSize DstSize = DstLT.second.getSizeInBits();
|
||||||
|
@ -1038,7 +1073,7 @@ public:
|
||||||
// If we are legalizing by splitting, query the concrete TTI for the cost
|
// If we are legalizing by splitting, query the concrete TTI for the cost
|
||||||
// of casting the original vector twice. We also need to factor in the
|
// of casting the original vector twice. We also need to factor in the
|
||||||
// cost of the split itself. Count that as 1, to be consistent with
|
// cost of the split itself. Count that as 1, to be consistent with
|
||||||
// TLI->getTypeLegalizationCost().
|
// getTypeLegalizationCost().
|
||||||
bool SplitSrc =
|
bool SplitSrc =
|
||||||
TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
|
TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
|
||||||
TargetLowering::TypeSplitVector;
|
TargetLowering::TypeSplitVector;
|
||||||
|
@ -1119,8 +1154,7 @@ public:
|
||||||
if (CondTy->isVectorTy())
|
if (CondTy->isVectorTy())
|
||||||
ISD = ISD::VSELECT;
|
ISD = ISD::VSELECT;
|
||||||
}
|
}
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
|
||||||
|
|
||||||
if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
|
if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
|
||||||
!TLI->isOperationExpand(ISD, LT.second)) {
|
!TLI->isOperationExpand(ISD, LT.second)) {
|
||||||
|
@ -1153,10 +1187,7 @@ public:
|
||||||
|
|
||||||
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
|
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||||
unsigned Index) {
|
unsigned Index) {
|
||||||
std::pair<InstructionCost, MVT> LT =
|
return getRegUsageForType(Val->getScalarType());
|
||||||
getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
|
|
||||||
|
|
||||||
return LT.first;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
|
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
|
||||||
|
@ -1205,8 +1236,7 @@ public:
|
||||||
// Assume types, such as structs, are expensive.
|
// Assume types, such as structs, are expensive.
|
||||||
if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
|
if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
|
||||||
return 4;
|
return 4;
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
|
||||||
getTLI()->getTypeLegalizationCost(DL, Src);
|
|
||||||
|
|
||||||
// Assuming that all loads of legal types cost 1.
|
// Assuming that all loads of legal types cost 1.
|
||||||
InstructionCost Cost = LT.first;
|
InstructionCost Cost = LT.first;
|
||||||
|
@ -1286,7 +1316,7 @@ public:
|
||||||
|
|
||||||
// Legalize the vector type, and get the legalized and unlegalized type
|
// Legalize the vector type, and get the legalized and unlegalized type
|
||||||
// sizes.
|
// sizes.
|
||||||
MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
|
MVT VecTyLT = getTypeLegalizationCost(VecTy).second;
|
||||||
unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
|
unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
|
||||||
unsigned VecTyLTSize = VecTyLT.getStoreSize();
|
unsigned VecTyLTSize = VecTyLT.getStoreSize();
|
||||||
|
|
||||||
|
@ -1583,9 +1613,7 @@ public:
|
||||||
// If we're not expanding the intrinsic then we assume this is cheap
|
// If we're not expanding the intrinsic then we assume this is cheap
|
||||||
// to implement.
|
// to implement.
|
||||||
if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
|
if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
|
||||||
std::pair<InstructionCost, MVT> LT =
|
return getTypeLegalizationCost(RetTy).first;
|
||||||
getTLI()->getTypeLegalizationCost(DL, RetTy);
|
|
||||||
return LT.first;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the expanded types that will be used to calculate the uadd_sat
|
// Create the expanded types that will be used to calculate the uadd_sat
|
||||||
|
@ -2031,8 +2059,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetLoweringBase *TLI = getTLI();
|
const TargetLoweringBase *TLI = getTLI();
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
|
||||||
TLI->getTypeLegalizationCost(DL, RetTy);
|
|
||||||
|
|
||||||
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
|
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
|
||||||
if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
|
if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
|
||||||
|
@ -2128,8 +2155,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumberOfParts(Type *Tp) {
|
unsigned getNumberOfParts(Type *Tp) {
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
getTLI()->getTypeLegalizationCost(DL, Tp);
|
|
||||||
return LT.first.isValid() ? *LT.first.getValue() : 0;
|
return LT.first.isValid() ? *LT.first.getValue() : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2187,8 +2213,7 @@ public:
|
||||||
unsigned NumReduxLevels = Log2_32(NumVecElts);
|
unsigned NumReduxLevels = Log2_32(NumVecElts);
|
||||||
InstructionCost ArithCost = 0;
|
InstructionCost ArithCost = 0;
|
||||||
InstructionCost ShuffleCost = 0;
|
InstructionCost ShuffleCost = 0;
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
|
||||||
thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
|
|
||||||
unsigned LongVectorCount = 0;
|
unsigned LongVectorCount = 0;
|
||||||
unsigned MVTLen =
|
unsigned MVTLen =
|
||||||
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
|
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
|
||||||
|
@ -2283,8 +2308,7 @@ public:
|
||||||
}
|
}
|
||||||
InstructionCost MinMaxCost = 0;
|
InstructionCost MinMaxCost = 0;
|
||||||
InstructionCost ShuffleCost = 0;
|
InstructionCost ShuffleCost = 0;
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
|
||||||
thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
|
|
||||||
unsigned LongVectorCount = 0;
|
unsigned LongVectorCount = 0;
|
||||||
unsigned MVTLen =
|
unsigned MVTLen =
|
||||||
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
|
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
|
||||||
|
|
|
@ -49,7 +49,6 @@
|
||||||
#include "llvm/Support/AtomicOrdering.h"
|
#include "llvm/Support/AtomicOrdering.h"
|
||||||
#include "llvm/Support/Casting.h"
|
#include "llvm/Support/Casting.h"
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
#include "llvm/Support/InstructionCost.h"
|
|
||||||
#include "llvm/Support/MachineValueType.h"
|
#include "llvm/Support/MachineValueType.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
@ -964,6 +963,22 @@ public:
|
||||||
return ValueTypeActions;
|
return ValueTypeActions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return pair that represents the legalization kind (first) that needs to
|
||||||
|
/// happen to EVT (second) in order to type-legalize it.
|
||||||
|
///
|
||||||
|
/// First: how we should legalize values of this type, either it is already
|
||||||
|
/// legal (return 'Legal') or we need to promote it to a larger type (return
|
||||||
|
/// 'Promote'), or we need to expand it into multiple registers of smaller
|
||||||
|
/// integer type (return 'Expand'). 'Custom' is not an option.
|
||||||
|
///
|
||||||
|
/// Second: for types supported by the target, this is an identity function.
|
||||||
|
/// For types that must be promoted to larger types, this returns the larger
|
||||||
|
/// type to promote to. For integer types that are larger than the largest
|
||||||
|
/// integer register, this contains one step in the expansion to get to the
|
||||||
|
/// smaller register. For illegal floating point types, this returns the
|
||||||
|
/// integer type to transform to.
|
||||||
|
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
|
||||||
|
|
||||||
/// Return how we should legalize values of this type, either it is already
|
/// Return how we should legalize values of this type, either it is already
|
||||||
/// legal (return 'Legal') or we need to promote it to a larger type (return
|
/// legal (return 'Legal') or we need to promote it to a larger type (return
|
||||||
/// 'Promote'), or we need to expand it into multiple registers of smaller
|
/// 'Promote'), or we need to expand it into multiple registers of smaller
|
||||||
|
@ -1905,10 +1920,6 @@ public:
|
||||||
/// Get the ISD node that corresponds to the Instruction class opcode.
|
/// Get the ISD node that corresponds to the Instruction class opcode.
|
||||||
int InstructionOpcodeToISD(unsigned Opcode) const;
|
int InstructionOpcodeToISD(unsigned Opcode) const;
|
||||||
|
|
||||||
/// Estimate the cost of type-legalization and the legalized type.
|
|
||||||
std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
|
|
||||||
Type *Ty) const;
|
|
||||||
|
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
@ -2535,22 +2546,6 @@ public:
|
||||||
Type *Ty, unsigned AddrSpace,
|
Type *Ty, unsigned AddrSpace,
|
||||||
Instruction *I = nullptr) const;
|
Instruction *I = nullptr) const;
|
||||||
|
|
||||||
/// Return the cost of the scaling factor used in the addressing mode
|
|
||||||
/// represented by AM for this target, for a load/store of the specified type.
|
|
||||||
///
|
|
||||||
/// If the AM is supported, the return value must be >= 0.
|
|
||||||
/// If the AM is not supported, it returns a negative value.
|
|
||||||
/// TODO: Handle pre/postinc as well.
|
|
||||||
/// TODO: Remove default argument
|
|
||||||
virtual InstructionCost getScalingFactorCost(const DataLayout &DL,
|
|
||||||
const AddrMode &AM, Type *Ty,
|
|
||||||
unsigned AS = 0) const {
|
|
||||||
// Default: assume that any scaling factor used in a legal AM is free.
|
|
||||||
if (isLegalAddressingMode(DL, AM, Ty, AS))
|
|
||||||
return 0;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return true if the specified immediate is legal icmp immediate, that is
|
/// Return true if the specified immediate is legal icmp immediate, that is
|
||||||
/// the target has icmp instructions which can compare a register against the
|
/// the target has icmp instructions which can compare a register against the
|
||||||
/// immediate without having to materialize the immediate into a register.
|
/// immediate without having to materialize the immediate into a register.
|
||||||
|
@ -3257,8 +3252,6 @@ private:
|
||||||
ValueTypeActionImpl ValueTypeActions;
|
ValueTypeActionImpl ValueTypeActions;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
|
|
||||||
|
|
||||||
/// Targets can specify ISD nodes that they would like PerformDAGCombine
|
/// Targets can specify ISD nodes that they would like PerformDAGCombine
|
||||||
/// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
|
/// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
|
||||||
/// array.
|
/// array.
|
||||||
|
|
|
@ -1843,41 +1843,6 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
|
||||||
llvm_unreachable("Unknown instruction type encountered!");
|
llvm_unreachable("Unknown instruction type encountered!");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT>
|
|
||||||
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
|
|
||||||
Type *Ty) const {
|
|
||||||
LLVMContext &C = Ty->getContext();
|
|
||||||
EVT MTy = getValueType(DL, Ty);
|
|
||||||
|
|
||||||
InstructionCost Cost = 1;
|
|
||||||
// We keep legalizing the type until we find a legal kind. We assume that
|
|
||||||
// the only operation that costs anything is the split. After splitting
|
|
||||||
// we need to handle two types.
|
|
||||||
while (true) {
|
|
||||||
LegalizeKind LK = getTypeConversion(C, MTy);
|
|
||||||
|
|
||||||
if (LK.first == TypeScalarizeScalableVector) {
|
|
||||||
// Ensure we return a sensible simple VT here, since many callers of this
|
|
||||||
// function require it.
|
|
||||||
MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
|
|
||||||
return std::make_pair(InstructionCost::getInvalid(), VT);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (LK.first == TypeLegal)
|
|
||||||
return std::make_pair(Cost, MTy.getSimpleVT());
|
|
||||||
|
|
||||||
if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
|
|
||||||
Cost *= 2;
|
|
||||||
|
|
||||||
// Do not loop with f128 type.
|
|
||||||
if (MTy == LK.second)
|
|
||||||
return std::make_pair(Cost, MTy.getSimpleVT());
|
|
||||||
|
|
||||||
// Keep legalizing the type.
|
|
||||||
MTy = LK.second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Value *
|
Value *
|
||||||
TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
|
TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
|
||||||
bool UseTLS) const {
|
bool UseTLS) const {
|
||||||
|
|
|
@ -13680,22 +13680,6 @@ bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
InstructionCost AArch64TargetLowering::getScalingFactorCost(
|
|
||||||
const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
|
|
||||||
// Scaling factors are not free at all.
|
|
||||||
// Operands | Rt Latency
|
|
||||||
// -------------------------------------------
|
|
||||||
// Rt, [Xn, Xm] | 4
|
|
||||||
// -------------------------------------------
|
|
||||||
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
|
|
||||||
// Rt, [Xn, Wm, <extend> #imm] |
|
|
||||||
if (isLegalAddressingMode(DL, AM, Ty, AS))
|
|
||||||
// Scale represents reg2 * scale, thus account for 1 if
|
|
||||||
// it is not equal to 0 or 1.
|
|
||||||
return AM.Scale != 0 && AM.Scale != 1;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
|
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
|
||||||
const MachineFunction &MF, EVT VT) const {
|
const MachineFunction &MF, EVT VT) const {
|
||||||
VT = VT.getScalarType();
|
VT = VT.getScalarType();
|
||||||
|
|
|
@ -634,14 +634,6 @@ public:
|
||||||
unsigned AS,
|
unsigned AS,
|
||||||
Instruction *I = nullptr) const override;
|
Instruction *I = nullptr) const override;
|
||||||
|
|
||||||
/// Return the cost of the scaling factor used in the addressing
|
|
||||||
/// mode represented by AM for this target, for a load/store
|
|
||||||
/// of the specified type.
|
|
||||||
/// If the AM is supported, the return value must be >= 0.
|
|
||||||
/// If the AM is not supported, it returns a negative value.
|
|
||||||
InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
|
|
||||||
Type *Ty, unsigned AS) const override;
|
|
||||||
|
|
||||||
/// Return true if an FMA operation is faster than a pair of fmul and fadd
|
/// Return true if an FMA operation is faster than a pair of fmul and fadd
|
||||||
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
|
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
|
||||||
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
|
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
|
||||||
|
|
|
@ -309,7 +309,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
case Intrinsic::smax: {
|
case Intrinsic::smax: {
|
||||||
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
|
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
|
||||||
MVT::v8i16, MVT::v2i32, MVT::v4i32};
|
MVT::v8i16, MVT::v2i32, MVT::v4i32};
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
auto LT = getTypeLegalizationCost(RetTy);
|
||||||
// v2i64 types get converted to cmp+bif hence the cost of 2
|
// v2i64 types get converted to cmp+bif hence the cost of 2
|
||||||
if (LT.second == MVT::v2i64)
|
if (LT.second == MVT::v2i64)
|
||||||
return LT.first * 2;
|
return LT.first * 2;
|
||||||
|
@ -324,7 +324,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
|
static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
|
||||||
MVT::v8i16, MVT::v2i32, MVT::v4i32,
|
MVT::v8i16, MVT::v2i32, MVT::v4i32,
|
||||||
MVT::v2i64};
|
MVT::v2i64};
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
auto LT = getTypeLegalizationCost(RetTy);
|
||||||
// This is a base cost of 1 for the vadd, plus 3 extract shifts if we
|
// This is a base cost of 1 for the vadd, plus 3 extract shifts if we
|
||||||
// need to extend the type, as it uses shr(qadd(shl, shl)).
|
// need to extend the type, as it uses shr(qadd(shl, shl)).
|
||||||
unsigned Instrs =
|
unsigned Instrs =
|
||||||
|
@ -337,14 +337,14 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
|
static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
|
||||||
MVT::v8i16, MVT::v2i32, MVT::v4i32,
|
MVT::v8i16, MVT::v2i32, MVT::v4i32,
|
||||||
MVT::v2i64};
|
MVT::v2i64};
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
auto LT = getTypeLegalizationCost(RetTy);
|
||||||
if (any_of(ValidAbsTys, [<](MVT M) { return M == LT.second; }))
|
if (any_of(ValidAbsTys, [<](MVT M) { return M == LT.second; }))
|
||||||
return LT.first;
|
return LT.first;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Intrinsic::experimental_stepvector: {
|
case Intrinsic::experimental_stepvector: {
|
||||||
InstructionCost Cost = 1; // Cost of the `index' instruction
|
InstructionCost Cost = 1; // Cost of the `index' instruction
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
auto LT = getTypeLegalizationCost(RetTy);
|
||||||
// Legalisation of illegal vectors involves an `index' instruction plus
|
// Legalisation of illegal vectors involves an `index' instruction plus
|
||||||
// (LT.first - 1) vector adds.
|
// (LT.first - 1) vector adds.
|
||||||
if (LT.first > 1) {
|
if (LT.first > 1) {
|
||||||
|
@ -368,7 +368,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
{Intrinsic::bitreverse, MVT::v1i64, 2},
|
{Intrinsic::bitreverse, MVT::v1i64, 2},
|
||||||
{Intrinsic::bitreverse, MVT::v2i64, 2},
|
{Intrinsic::bitreverse, MVT::v2i64, 2},
|
||||||
};
|
};
|
||||||
const auto LegalisationCost = TLI->getTypeLegalizationCost(DL, RetTy);
|
const auto LegalisationCost = getTypeLegalizationCost(RetTy);
|
||||||
const auto *Entry =
|
const auto *Entry =
|
||||||
CostTableLookup(BitreverseTbl, ICA.getID(), LegalisationCost.second);
|
CostTableLookup(BitreverseTbl, ICA.getID(), LegalisationCost.second);
|
||||||
if (Entry) {
|
if (Entry) {
|
||||||
|
@ -394,7 +394,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
{ISD::CTPOP, MVT::v8i8, 1},
|
{ISD::CTPOP, MVT::v8i8, 1},
|
||||||
{ISD::CTPOP, MVT::i32, 5},
|
{ISD::CTPOP, MVT::i32, 5},
|
||||||
};
|
};
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
auto LT = getTypeLegalizationCost(RetTy);
|
||||||
MVT MTy = LT.second;
|
MVT MTy = LT.second;
|
||||||
if (const auto *Entry = CostTableLookup(CtpopCostTbl, ISD::CTPOP, MTy)) {
|
if (const auto *Entry = CostTableLookup(CtpopCostTbl, ISD::CTPOP, MTy)) {
|
||||||
// Extra cost of +1 when illegal vector types are legalized by promoting
|
// Extra cost of +1 when illegal vector types are legalized by promoting
|
||||||
|
@ -451,7 +451,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
if (ICA.getArgTypes().empty())
|
if (ICA.getArgTypes().empty())
|
||||||
break;
|
break;
|
||||||
bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
|
bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
|
auto LT = getTypeLegalizationCost(ICA.getArgTypes()[0]);
|
||||||
EVT MTy = TLI->getValueType(DL, RetTy);
|
EVT MTy = TLI->getValueType(DL, RetTy);
|
||||||
// Check for the legal types, which are where the size of the input and the
|
// Check for the legal types, which are where the size of the input and the
|
||||||
// output are the same, or we are using cvt f64->i32 or f32->i64.
|
// output are the same, or we are using cvt f64->i32 or f32->i64.
|
||||||
|
@ -1534,7 +1534,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
|
||||||
|
|
||||||
// Legalize the destination type and ensure it can be used in a widening
|
// Legalize the destination type and ensure it can be used in a widening
|
||||||
// operation.
|
// operation.
|
||||||
auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
|
auto DstTyL = getTypeLegalizationCost(DstTy);
|
||||||
unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
|
unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
|
||||||
if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
|
if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
|
||||||
return false;
|
return false;
|
||||||
|
@ -1542,7 +1542,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
|
||||||
// Legalize the source type and ensure it can be used in a widening
|
// Legalize the source type and ensure it can be used in a widening
|
||||||
// operation.
|
// operation.
|
||||||
auto *SrcTy = toVectorTy(Extend->getSrcTy());
|
auto *SrcTy = toVectorTy(Extend->getSrcTy());
|
||||||
auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
|
auto SrcTyL = getTypeLegalizationCost(SrcTy);
|
||||||
unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
|
unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
|
||||||
if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
|
if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
|
||||||
return false;
|
return false;
|
||||||
|
@ -1899,7 +1899,7 @@ InstructionCost AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode,
|
||||||
getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
|
getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
|
||||||
|
|
||||||
// Legalize the types.
|
// Legalize the types.
|
||||||
auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
|
auto VecLT = getTypeLegalizationCost(VecTy);
|
||||||
auto DstVT = TLI->getValueType(DL, Dst);
|
auto DstVT = TLI->getValueType(DL, Dst);
|
||||||
auto SrcVT = TLI->getValueType(DL, Src);
|
auto SrcVT = TLI->getValueType(DL, Src);
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
||||||
|
@ -1954,7 +1954,7 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||||
|
|
||||||
if (Index != -1U) {
|
if (Index != -1U) {
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
|
||||||
|
|
||||||
// This type is legalized to a scalar type.
|
// This type is legalized to a scalar type.
|
||||||
if (!LT.second.isVector())
|
if (!LT.second.isVector())
|
||||||
|
@ -1989,7 +1989,7 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
|
||||||
Opd2PropInfo, Args, CxtI);
|
Opd2PropInfo, Args, CxtI);
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||||
|
|
||||||
switch (ISD) {
|
switch (ISD) {
|
||||||
|
@ -2150,7 +2150,7 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
|
MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
|
||||||
static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
|
static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
|
||||||
|
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
auto LT = getTypeLegalizationCost(ValTy);
|
||||||
if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }) ||
|
if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }) ||
|
||||||
(ST->hasFullFP16() &&
|
(ST->hasFullFP16() &&
|
||||||
any_of(ValidFP16MinMaxTys, [<](MVT M) { return M == LT.second; })))
|
any_of(ValidFP16MinMaxTys, [<](MVT M) { return M == LT.second; })))
|
||||||
|
@ -2210,7 +2210,7 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
if (useNeonVector(Src))
|
if (useNeonVector(Src))
|
||||||
return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
||||||
CostKind);
|
CostKind);
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, Src);
|
auto LT = getTypeLegalizationCost(Src);
|
||||||
if (!LT.first.isValid())
|
if (!LT.first.isValid())
|
||||||
return InstructionCost::getInvalid();
|
return InstructionCost::getInvalid();
|
||||||
|
|
||||||
|
@ -2235,7 +2235,7 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
|
||||||
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
|
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
|
||||||
Alignment, CostKind, I);
|
Alignment, CostKind, I);
|
||||||
auto *VT = cast<VectorType>(DataTy);
|
auto *VT = cast<VectorType>(DataTy);
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
|
auto LT = getTypeLegalizationCost(DataTy);
|
||||||
if (!LT.first.isValid())
|
if (!LT.first.isValid())
|
||||||
return InstructionCost::getInvalid();
|
return InstructionCost::getInvalid();
|
||||||
|
|
||||||
|
@ -2272,7 +2272,7 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
||||||
return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
|
return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
|
||||||
CostKind);
|
CostKind);
|
||||||
|
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
|
auto LT = getTypeLegalizationCost(Ty);
|
||||||
if (!LT.first.isValid())
|
if (!LT.first.isValid())
|
||||||
return InstructionCost::getInvalid();
|
return InstructionCost::getInvalid();
|
||||||
|
|
||||||
|
@ -2617,7 +2617,7 @@ InstructionCost
|
||||||
AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
||||||
bool IsUnsigned,
|
bool IsUnsigned,
|
||||||
TTI::TargetCostKind CostKind) {
|
TTI::TargetCostKind CostKind) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
|
|
||||||
if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
|
if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
|
||||||
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
|
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
|
||||||
|
@ -2641,7 +2641,7 @@ AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
||||||
|
|
||||||
InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(
|
InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(
|
||||||
unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) {
|
unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
InstructionCost LegalizationCost = 0;
|
InstructionCost LegalizationCost = 0;
|
||||||
if (LT.first > 1) {
|
if (LT.first > 1) {
|
||||||
Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
|
Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
|
||||||
|
@ -2690,7 +2690,7 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||||
if (isa<ScalableVectorType>(ValTy))
|
if (isa<ScalableVectorType>(ValTy))
|
||||||
return getArithmeticReductionCostSVE(Opcode, ValTy, CostKind);
|
return getArithmeticReductionCostSVE(Opcode, ValTy, CostKind);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
MVT MTy = LT.second;
|
MVT MTy = LT.second;
|
||||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||||
assert(ISD && "Invalid opcode");
|
assert(ISD && "Invalid opcode");
|
||||||
|
@ -2782,7 +2782,7 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
|
||||||
{ TTI::SK_Splice, MVT::nxv2f64, 1 },
|
{ TTI::SK_Splice, MVT::nxv2f64, 1 },
|
||||||
};
|
};
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Tp->getContext());
|
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Tp->getContext());
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
||||||
EVT PromotedVT = LT.second.getScalarType() == MVT::i1
|
EVT PromotedVT = LT.second.getScalarType() == MVT::i1
|
||||||
|
@ -2819,7 +2819,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
ArrayRef<int> Mask, int Index,
|
ArrayRef<int> Mask, int Index,
|
||||||
VectorType *SubTp,
|
VectorType *SubTp,
|
||||||
ArrayRef<const Value *> Args) {
|
ArrayRef<const Value *> Args) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
// If we have a Mask, and the LT is being legalized somehow, split the Mask
|
// If we have a Mask, and the LT is being legalized somehow, split the Mask
|
||||||
// into smaller vectors and sum the cost of each shuffle.
|
// into smaller vectors and sum the cost of each shuffle.
|
||||||
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
|
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
|
||||||
|
@ -3016,8 +3016,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
// move, so long as the inserted vector is "aligned".
|
// move, so long as the inserted vector is "aligned".
|
||||||
if (Kind == TTI::SK_InsertSubvector && LT.second.isFixedLengthVector() &&
|
if (Kind == TTI::SK_InsertSubvector && LT.second.isFixedLengthVector() &&
|
||||||
LT.second.getSizeInBits() <= 128 && SubTp) {
|
LT.second.getSizeInBits() <= 128 && SubTp) {
|
||||||
std::pair<InstructionCost, MVT> SubLT =
|
std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
|
||||||
TLI->getTypeLegalizationCost(DL, SubTp);
|
|
||||||
if (SubLT.second.isVector()) {
|
if (SubLT.second.isVector()) {
|
||||||
int NumElts = LT.second.getVectorNumElements();
|
int NumElts = LT.second.getVectorNumElements();
|
||||||
int NumSubElts = SubLT.second.getVectorNumElements();
|
int NumSubElts = SubLT.second.getVectorNumElements();
|
||||||
|
@ -3052,3 +3051,26 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(
|
||||||
|
|
||||||
return (TailFoldingKindLoc & Required) == Required;
|
return (TailFoldingKindLoc & Required) == Required;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InstructionCost
|
||||||
|
AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||||
|
int64_t BaseOffset, bool HasBaseReg,
|
||||||
|
int64_t Scale, unsigned AddrSpace) const {
|
||||||
|
// Scaling factors are not free at all.
|
||||||
|
// Operands | Rt Latency
|
||||||
|
// -------------------------------------------
|
||||||
|
// Rt, [Xn, Xm] | 4
|
||||||
|
// -------------------------------------------
|
||||||
|
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
|
||||||
|
// Rt, [Xn, Wm, <extend> #imm] |
|
||||||
|
TargetLoweringBase::AddrMode AM;
|
||||||
|
AM.BaseGV = BaseGV;
|
||||||
|
AM.BaseOffs = BaseOffset;
|
||||||
|
AM.HasBaseReg = HasBaseReg;
|
||||||
|
AM.Scale = Scale;
|
||||||
|
if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
|
||||||
|
// Scale represents reg2 * scale, thus account for 1 if
|
||||||
|
// it is not equal to 0 or 1.
|
||||||
|
return AM.Scale != 0 && AM.Scale != 1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
|
@ -371,6 +371,15 @@ public:
|
||||||
ArrayRef<int> Mask, int Index,
|
ArrayRef<int> Mask, int Index,
|
||||||
VectorType *SubTp,
|
VectorType *SubTp,
|
||||||
ArrayRef<const Value *> Args = None);
|
ArrayRef<const Value *> Args = None);
|
||||||
|
|
||||||
|
/// Return the cost of the scaling factor used in the addressing
|
||||||
|
/// mode represented by AM for this target, for a load/store
|
||||||
|
/// of the specified type.
|
||||||
|
/// If the AM is supported, the return value must be >= 0.
|
||||||
|
/// If the AM is not supported, it returns a negative value.
|
||||||
|
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||||
|
int64_t BaseOffset, bool HasBaseReg,
|
||||||
|
int64_t Scale, unsigned AddrSpace) const;
|
||||||
/// @}
|
/// @}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -518,7 +518,7 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
|
||||||
const Instruction *CxtI) {
|
const Instruction *CxtI) {
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||||
|
|
||||||
// Because we don't have any legal vector operations, but the legal types, we
|
// Because we don't have any legal vector operations, but the legal types, we
|
||||||
|
@ -690,7 +690,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
Type *RetTy = ICA.getReturnType();
|
Type *RetTy = ICA.getReturnType();
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
|
||||||
|
|
||||||
unsigned NElts = LT.second.isVector() ?
|
unsigned NElts = LT.second.isVector() ?
|
||||||
LT.second.getVectorNumElements() : 1;
|
LT.second.getVectorNumElements() : 1;
|
||||||
|
@ -769,7 +769,7 @@ GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||||
if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16)
|
if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16)
|
||||||
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
|
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
return LT.first * getFullRateInstrCost();
|
return LT.first * getFullRateInstrCost();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -784,7 +784,7 @@ GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
||||||
if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16)
|
if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16)
|
||||||
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
|
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
return LT.first * getHalfRateInstrCost(CostKind);
|
return LT.first * getHalfRateInstrCost(CostKind);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1148,3 +1148,17 @@ int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const {
|
||||||
: ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind)
|
: ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind)
|
||||||
: getQuarterRateInstrCost(CostKind);
|
: getQuarterRateInstrCost(CostKind);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<InstructionCost, MVT>
|
||||||
|
GCNTTIImpl::getTypeLegalizationCost(Type *Ty) const {
|
||||||
|
std::pair<InstructionCost, MVT> Cost = BaseT::getTypeLegalizationCost(Ty);
|
||||||
|
auto Size = DL.getTypeSizeInBits(Ty);
|
||||||
|
// Maximum load or store can handle 8 dwords for scalar and 4 for
|
||||||
|
// vector ALU. Let's assume anything above 8 dwords is expensive
|
||||||
|
// even if legal.
|
||||||
|
if (Size <= 256)
|
||||||
|
return Cost;
|
||||||
|
|
||||||
|
Cost.first += (Size + 255) / 256;
|
||||||
|
return Cost;
|
||||||
|
}
|
||||||
|
|
|
@ -94,6 +94,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||||
// quarter. This also applies to some integer operations.
|
// quarter. This also applies to some integer operations.
|
||||||
int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
|
int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
|
||||||
|
|
||||||
|
std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
|
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
|
||||||
|
|
||||||
|
|
|
@ -12947,22 +12947,6 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
|
||||||
return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
|
return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT>
|
|
||||||
SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
|
|
||||||
Type *Ty) const {
|
|
||||||
std::pair<InstructionCost, MVT> Cost =
|
|
||||||
TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
|
|
||||||
auto Size = DL.getTypeSizeInBits(Ty);
|
|
||||||
// Maximum load or store can handle 8 dwords for scalar and 4 for
|
|
||||||
// vector ALU. Let's assume anything above 8 dwords is expensive
|
|
||||||
// even if legal.
|
|
||||||
if (Size <= 256)
|
|
||||||
return Cost;
|
|
||||||
|
|
||||||
Cost.first += (Size + 255) / 256;
|
|
||||||
return Cost;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SITargetLowering::hasMemSDNodeUser(SDNode *N) const {
|
bool SITargetLowering::hasMemSDNodeUser(SDNode *N) const {
|
||||||
SDNode::use_iterator I = N->use_begin(), E = N->use_end();
|
SDNode::use_iterator I = N->use_begin(), E = N->use_end();
|
||||||
for (; I != E; ++I) {
|
for (; I != E; ++I) {
|
||||||
|
|
|
@ -525,9 +525,6 @@ public:
|
||||||
const SIRegisterInfo &TRI,
|
const SIRegisterInfo &TRI,
|
||||||
SIMachineFunctionInfo &Info) const;
|
SIMachineFunctionInfo &Info) const;
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
|
|
||||||
Type *Ty) const;
|
|
||||||
|
|
||||||
MachineMemOperand::Flags
|
MachineMemOperand::Flags
|
||||||
getTargetMMOFlags(const Instruction &I) const override;
|
getTargetMMOFlags(const Instruction &I) const override;
|
||||||
};
|
};
|
||||||
|
|
|
@ -19147,18 +19147,6 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
InstructionCost ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
|
|
||||||
const AddrMode &AM,
|
|
||||||
Type *Ty,
|
|
||||||
unsigned AS) const {
|
|
||||||
if (isLegalAddressingMode(DL, AM, Ty, AS)) {
|
|
||||||
if (Subtarget->hasFPAO())
|
|
||||||
return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
|
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
|
||||||
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
|
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
|
||||||
/// expanded to FMAs when this method returns true, otherwise fmuladd is
|
/// expanded to FMAs when this method returns true, otherwise fmuladd is
|
||||||
|
|
|
@ -470,14 +470,6 @@ class VectorType;
|
||||||
Type *Ty, unsigned AS,
|
Type *Ty, unsigned AS,
|
||||||
Instruction *I = nullptr) const override;
|
Instruction *I = nullptr) const override;
|
||||||
|
|
||||||
/// getScalingFactorCost - Return the cost of the scaling used in
|
|
||||||
/// addressing mode represented by AM.
|
|
||||||
/// If the AM is supported, the return value must be >= 0.
|
|
||||||
/// If the AM is not supported, the return value must be negative.
|
|
||||||
InstructionCost getScalingFactorCost(const DataLayout &DL,
|
|
||||||
const AddrMode &AM, Type *Ty,
|
|
||||||
unsigned AS) const override;
|
|
||||||
|
|
||||||
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
|
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
|
||||||
|
|
||||||
/// Returns true if the addressing mode representing by AM is legal
|
/// Returns true if the addressing mode representing by AM is legal
|
||||||
|
|
|
@ -634,7 +634,7 @@ InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
{ISD::FP_EXTEND, MVT::v2f32, 2},
|
{ISD::FP_EXTEND, MVT::v2f32, 2},
|
||||||
{ISD::FP_EXTEND, MVT::v4f32, 4}};
|
{ISD::FP_EXTEND, MVT::v4f32, 4}};
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
|
||||||
if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
|
if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
|
||||||
return AdjustCost(LT.first * Entry->Cost);
|
return AdjustCost(LT.first * Entry->Cost);
|
||||||
}
|
}
|
||||||
|
@ -901,7 +901,7 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
// sometimes just be vmovs. Integer involve being passes to GPR registers,
|
// sometimes just be vmovs. Integer involve being passes to GPR registers,
|
||||||
// causing more of a delay.
|
// causing more of a delay.
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT =
|
||||||
getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType());
|
getTypeLegalizationCost(ValTy->getScalarType());
|
||||||
return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
|
return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -926,7 +926,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
// - may require one or more conditional mov (including an IT),
|
// - may require one or more conditional mov (including an IT),
|
||||||
// - can't operate directly on immediates,
|
// - can't operate directly on immediates,
|
||||||
// - require live flags, which we can't copy around easily.
|
// - require live flags, which we can't copy around easily.
|
||||||
InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
|
InstructionCost Cost = getTypeLegalizationCost(ValTy).first;
|
||||||
|
|
||||||
// Possible IT instruction for Thumb2, or more for Thumb1.
|
// Possible IT instruction for Thumb2, or more for Thumb1.
|
||||||
++Cost;
|
++Cost;
|
||||||
|
@ -1003,8 +1003,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
return Entry->Cost;
|
return Entry->Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
|
||||||
return LT.first;
|
return LT.first;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1028,8 +1027,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
I);
|
I);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
|
||||||
int BaseCost = ST->getMVEVectorCostFactor(CostKind);
|
int BaseCost = ST->getMVEVectorCostFactor(CostKind);
|
||||||
// There are two types - the input that specifies the type of the compare
|
// There are two types - the input that specifies the type of the compare
|
||||||
// and the output vXi1 type. Because we don't know how the output will be
|
// and the output vXi1 type. Because we don't know how the output will be
|
||||||
|
@ -1222,7 +1220,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
|
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
|
||||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
|
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
if (const auto *Entry =
|
if (const auto *Entry =
|
||||||
CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||||
return LT.first * Entry->Cost;
|
return LT.first * Entry->Cost;
|
||||||
|
@ -1243,7 +1241,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
|
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
|
||||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
|
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
if (const auto *Entry =
|
if (const auto *Entry =
|
||||||
CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||||
return LT.first * Entry->Cost;
|
return LT.first * Entry->Cost;
|
||||||
|
@ -1267,7 +1265,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
|
|
||||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
|
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
|
if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
|
||||||
ISD::VECTOR_SHUFFLE, LT.second))
|
ISD::VECTOR_SHUFFLE, LT.second))
|
||||||
return LT.first * Entry->Cost;
|
return LT.first * Entry->Cost;
|
||||||
|
@ -1283,7 +1281,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
|
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
|
||||||
{ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};
|
{ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
|
if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
|
||||||
LT.second))
|
LT.second))
|
||||||
return LT.first * Entry->Cost *
|
return LT.first * Entry->Cost *
|
||||||
|
@ -1291,7 +1289,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Mask.empty()) {
|
if (!Mask.empty()) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
if (LT.second.isVector() &&
|
if (LT.second.isVector() &&
|
||||||
Mask.size() <= LT.second.getVectorNumElements() &&
|
Mask.size() <= LT.second.getVectorNumElements() &&
|
||||||
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
|
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
|
||||||
|
@ -1328,7 +1326,7 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
|
|
||||||
if (ST->hasNEON()) {
|
if (ST->hasNEON()) {
|
||||||
const unsigned FunctionCallDivCost = 20;
|
const unsigned FunctionCallDivCost = 20;
|
||||||
|
@ -1467,7 +1465,7 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
|
cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
|
||||||
// Unaligned loads/stores are extremely inefficient.
|
// Unaligned loads/stores are extremely inefficient.
|
||||||
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
|
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
|
||||||
return LT.first * 4;
|
return LT.first * 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1568,7 +1566,7 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(
|
||||||
|
|
||||||
unsigned NumElems = VTy->getNumElements();
|
unsigned NumElems = VTy->getNumElements();
|
||||||
unsigned EltSize = VTy->getScalarSizeInBits();
|
unsigned EltSize = VTy->getScalarSizeInBits();
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(DataTy);
|
||||||
|
|
||||||
// For now, it is assumed that for the MVE gather instructions the loads are
|
// For now, it is assumed that for the MVE gather instructions the loads are
|
||||||
// all effectively serialised. This means the cost is the scalar cost
|
// all effectively serialised. This means the cost is the scalar cost
|
||||||
|
@ -1664,7 +1662,7 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||||
if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
|
if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
|
||||||
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
|
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
|
|
||||||
static const CostTblEntry CostTblAdd[]{
|
static const CostTblEntry CostTblAdd[]{
|
||||||
{ISD::ADD, MVT::v16i8, 1},
|
{ISD::ADD, MVT::v16i8, 1},
|
||||||
|
@ -1688,8 +1686,7 @@ InstructionCost ARMTTIImpl::getExtendedReductionCost(
|
||||||
switch (ISD) {
|
switch (ISD) {
|
||||||
case ISD::ADD:
|
case ISD::ADD:
|
||||||
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
|
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
|
||||||
|
|
||||||
// The legal cases are:
|
// The legal cases are:
|
||||||
// VADDV u/s 8/16/32
|
// VADDV u/s 8/16/32
|
||||||
|
@ -1720,8 +1717,7 @@ ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
|
||||||
EVT ResVT = TLI->getValueType(DL, ResTy);
|
EVT ResVT = TLI->getValueType(DL, ResTy);
|
||||||
|
|
||||||
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
|
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
TLI->getTypeLegalizationCost(DL, ValTy);
|
|
||||||
|
|
||||||
// The legal cases are:
|
// The legal cases are:
|
||||||
// VMLAV u/s 8/16/32
|
// VMLAV u/s 8/16/32
|
||||||
|
@ -1763,7 +1759,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
break;
|
break;
|
||||||
Type *VT = ICA.getReturnType();
|
Type *VT = ICA.getReturnType();
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VT);
|
||||||
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
|
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
|
||||||
LT.second == MVT::v16i8) {
|
LT.second == MVT::v16i8) {
|
||||||
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
|
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
|
||||||
|
@ -1783,7 +1779,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
break;
|
break;
|
||||||
Type *VT = ICA.getReturnType();
|
Type *VT = ICA.getReturnType();
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VT);
|
||||||
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
|
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
|
||||||
LT.second == MVT::v16i8)
|
LT.second == MVT::v16i8)
|
||||||
return LT.first * ST->getMVEVectorCostFactor(CostKind);
|
return LT.first * ST->getMVEVectorCostFactor(CostKind);
|
||||||
|
@ -1794,7 +1790,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
if (!ST->hasMVEFloatOps())
|
if (!ST->hasMVEFloatOps())
|
||||||
break;
|
break;
|
||||||
Type *VT = ICA.getReturnType();
|
Type *VT = ICA.getReturnType();
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VT);
|
||||||
if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
|
if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
|
||||||
return LT.first * ST->getMVEVectorCostFactor(CostKind);
|
return LT.first * ST->getMVEVectorCostFactor(CostKind);
|
||||||
break;
|
break;
|
||||||
|
@ -1804,7 +1800,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
if (ICA.getArgTypes().empty())
|
if (ICA.getArgTypes().empty())
|
||||||
break;
|
break;
|
||||||
bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
|
bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
|
auto LT = getTypeLegalizationCost(ICA.getArgTypes()[0]);
|
||||||
EVT MTy = TLI->getValueType(DL, ICA.getReturnType());
|
EVT MTy = TLI->getValueType(DL, ICA.getReturnType());
|
||||||
// Check for the legal types, with the corect subtarget features.
|
// Check for the legal types, with the corect subtarget features.
|
||||||
if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
|
if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
|
||||||
|
@ -2416,3 +2412,20 @@ bool ARMTTIImpl::preferPredicatedReductionSelect(
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InstructionCost ARMTTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||||
|
int64_t BaseOffset,
|
||||||
|
bool HasBaseReg, int64_t Scale,
|
||||||
|
unsigned AddrSpace) const {
|
||||||
|
TargetLoweringBase::AddrMode AM;
|
||||||
|
AM.BaseGV = BaseGV;
|
||||||
|
AM.BaseOffs = BaseOffset;
|
||||||
|
AM.HasBaseReg = HasBaseReg;
|
||||||
|
AM.Scale = Scale;
|
||||||
|
if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace)) {
|
||||||
|
if (ST->hasFPAO())
|
||||||
|
return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
|
@ -287,6 +287,14 @@ public:
|
||||||
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
TTI::TargetCostKind CostKind);
|
TTI::TargetCostKind CostKind);
|
||||||
|
|
||||||
|
/// getScalingFactorCost - Return the cost of the scaling used in
|
||||||
|
/// addressing mode represented by AM.
|
||||||
|
/// If the AM is supported, the return value must be >= 0.
|
||||||
|
/// If the AM is not supported, the return value must be negative.
|
||||||
|
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||||
|
int64_t BaseOffset, bool HasBaseReg,
|
||||||
|
int64_t Scale, unsigned AddrSpace) const;
|
||||||
|
|
||||||
bool maybeLoweredToCall(Instruction &I);
|
bool maybeLoweredToCall(Instruction &I);
|
||||||
bool isLoweredToCall(const Function *F);
|
bool isLoweredToCall(const Function *F);
|
||||||
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
||||||
|
|
|
@ -145,7 +145,7 @@ HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
TTI::TargetCostKind CostKind) {
|
TTI::TargetCostKind CostKind) {
|
||||||
if (ICA.getID() == Intrinsic::bswap) {
|
if (ICA.getID() == Intrinsic::bswap) {
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT =
|
||||||
TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
|
getTypeLegalizationCost(ICA.getReturnType());
|
||||||
return LT.first + 2;
|
return LT.first + 2;
|
||||||
}
|
}
|
||||||
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
||||||
|
@ -254,7 +254,7 @@ InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
TTI::TargetCostKind CostKind,
|
TTI::TargetCostKind CostKind,
|
||||||
const Instruction *I) {
|
const Instruction *I) {
|
||||||
if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
|
if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
if (Opcode == Instruction::FCmp)
|
if (Opcode == Instruction::FCmp)
|
||||||
return LT.first + FloatFactor * getTypeNumElements(ValTy);
|
return LT.first + FloatFactor * getTypeNumElements(ValTy);
|
||||||
}
|
}
|
||||||
|
@ -274,7 +274,7 @@ InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
|
||||||
Opd2PropInfo, Args, CxtI);
|
Opd2PropInfo, Args, CxtI);
|
||||||
|
|
||||||
if (Ty->isVectorTy()) {
|
if (Ty->isVectorTy()) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
if (LT.second.isFloatingPoint())
|
if (LT.second.isFloatingPoint())
|
||||||
return LT.first + FloatFactor * getTypeNumElements(Ty);
|
return LT.first + FloatFactor * getTypeNumElements(Ty);
|
||||||
}
|
}
|
||||||
|
@ -291,10 +291,8 @@ InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
|
||||||
unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
|
unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
|
||||||
unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
|
unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> SrcLT =
|
std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcTy);
|
||||||
TLI.getTypeLegalizationCost(DL, SrcTy);
|
std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(DstTy);
|
||||||
std::pair<InstructionCost, MVT> DstLT =
|
|
||||||
TLI.getTypeLegalizationCost(DL, DstTy);
|
|
||||||
InstructionCost Cost =
|
InstructionCost Cost =
|
||||||
std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
|
std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
|
||||||
// TODO: Allow non-throughput costs that aren't binary.
|
// TODO: Allow non-throughput costs that aren't binary.
|
||||||
|
|
|
@ -428,7 +428,7 @@ InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
|
||||||
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
|
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
|
||||||
const Instruction *CxtI) {
|
const Instruction *CxtI) {
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
|
|
||||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||||
|
|
||||||
|
|
|
@ -331,8 +331,7 @@ InstructionCost PPCTTIImpl::getUserCost(const User *U,
|
||||||
|
|
||||||
if (U->getType()->isVectorTy()) {
|
if (U->getType()->isVectorTy()) {
|
||||||
// Instructions that need to be split should cost more.
|
// Instructions that need to be split should cost more.
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());
|
||||||
TLI->getTypeLegalizationCost(DL, U->getType());
|
|
||||||
return LT.first * BaseT::getUserCost(U, Operands, CostKind);
|
return LT.first * BaseT::getUserCost(U, Operands, CostKind);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -960,7 +959,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,
|
||||||
if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
|
if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
|
||||||
return InstructionCost(1);
|
return InstructionCost(1);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
|
std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);
|
||||||
// If type legalization involves splitting the vector, we don't want to
|
// If type legalization involves splitting the vector, we don't want to
|
||||||
// double the cost at every step - only the last step.
|
// double the cost at every step - only the last step.
|
||||||
if (LT1.first != 1 || !LT1.second.isVector())
|
if (LT1.first != 1 || !LT1.second.isVector())
|
||||||
|
@ -971,7 +970,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,
|
||||||
return InstructionCost(1);
|
return InstructionCost(1);
|
||||||
|
|
||||||
if (Ty2) {
|
if (Ty2) {
|
||||||
std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
|
std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);
|
||||||
if (LT2.first != 1 || !LT2.second.isVector())
|
if (LT2.first != 1 || !LT2.second.isVector())
|
||||||
return InstructionCost(1);
|
return InstructionCost(1);
|
||||||
}
|
}
|
||||||
|
@ -1014,7 +1013,7 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
|
||||||
return InstructionCost::getMax();
|
return InstructionCost::getMax();
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
|
|
||||||
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
|
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
|
||||||
// (at least in the sense that there need only be one non-loop-invariant
|
// (at least in the sense that there need only be one non-loop-invariant
|
||||||
|
@ -1156,7 +1155,7 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
||||||
CostKind);
|
CostKind);
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
|
||||||
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
|
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
|
||||||
"Invalid Opcode");
|
"Invalid Opcode");
|
||||||
|
|
||||||
|
@ -1246,7 +1245,7 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
|
||||||
"Expect a vector type for interleaved memory op");
|
"Expect a vector type for interleaved memory op");
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);
|
||||||
|
|
||||||
// Firstly, the cost of load/store operation.
|
// Firstly, the cost of load/store operation.
|
||||||
InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
|
InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
|
||||||
|
@ -1427,8 +1426,7 @@ InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
assert(SrcVTy && "Expected a vector type for VP memory operations");
|
assert(SrcVTy && "Expected a vector type for VP memory operations");
|
||||||
|
|
||||||
if (hasActiveVectorLength(Opcode, Src, Alignment)) {
|
if (hasActiveVectorLength(Opcode, Src, Alignment)) {
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);
|
||||||
TLI->getTypeLegalizationCost(DL, SrcVTy);
|
|
||||||
|
|
||||||
InstructionCost CostFactor =
|
InstructionCost CostFactor =
|
||||||
vectorCostAdjustmentFactor(Opcode, Src, nullptr);
|
vectorCostAdjustmentFactor(Opcode, Src, nullptr);
|
||||||
|
|
|
@ -168,7 +168,7 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
|
InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
|
|
||||||
unsigned Cost = 2; // vslidedown+vslideup.
|
unsigned Cost = 2; // vslidedown+vslideup.
|
||||||
// TODO: LMUL should increase cost.
|
// TODO: LMUL should increase cost.
|
||||||
|
@ -182,7 +182,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
int Index, VectorType *SubTp,
|
int Index, VectorType *SubTp,
|
||||||
ArrayRef<const Value *> Args) {
|
ArrayRef<const Value *> Args) {
|
||||||
if (isa<ScalableVectorType>(Tp)) {
|
if (isa<ScalableVectorType>(Tp)) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||||
switch (Kind) {
|
switch (Kind) {
|
||||||
default:
|
default:
|
||||||
// Fallthrough to generic handling.
|
// Fallthrough to generic handling.
|
||||||
|
@ -257,7 +257,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
// TODO: add more intrinsic
|
// TODO: add more intrinsic
|
||||||
case Intrinsic::experimental_stepvector: {
|
case Intrinsic::experimental_stepvector: {
|
||||||
unsigned Cost = 1; // vid
|
unsigned Cost = 1; // vid
|
||||||
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
auto LT = getTypeLegalizationCost(RetTy);
|
||||||
return Cost + (LT.first - 1);
|
return Cost + (LT.first - 1);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -364,7 +364,7 @@ RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
||||||
if (Ty->getScalarSizeInBits() > ST->getELEN())
|
if (Ty->getScalarSizeInBits() > ST->getELEN())
|
||||||
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
|
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
if (Ty->getElementType()->isIntegerTy(1))
|
if (Ty->getElementType()->isIntegerTy(1))
|
||||||
// vcpop sequences, see vreduction-mask.ll. umax, smin actually only
|
// vcpop sequences, see vreduction-mask.ll. umax, smin actually only
|
||||||
// cost 2, but we don't have enough info here so we slightly over cost.
|
// cost 2, but we don't have enough info here so we slightly over cost.
|
||||||
|
@ -394,7 +394,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||||
ISD != ISD::FADD)
|
ISD != ISD::FADD)
|
||||||
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
|
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
if (Ty->getElementType()->isIntegerTy(1))
|
if (Ty->getElementType()->isIntegerTy(1))
|
||||||
// vcpop sequences, see vreduction-mask.ll
|
// vcpop sequences, see vreduction-mask.ll
|
||||||
return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
|
return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
|
||||||
|
@ -423,7 +423,7 @@ InstructionCost RISCVTTIImpl::getExtendedReductionCost(
|
||||||
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
|
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
|
||||||
FMF, CostKind);
|
FMF, CostKind);
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
|
|
||||||
if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())
|
if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())
|
||||||
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
|
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
|
||||||
|
|
|
@ -56478,35 +56478,6 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||||
return Res;
|
return Res;
|
||||||
}
|
}
|
||||||
|
|
||||||
InstructionCost X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
|
|
||||||
const AddrMode &AM,
|
|
||||||
Type *Ty,
|
|
||||||
unsigned AS) const {
|
|
||||||
// Scaling factors are not free at all.
|
|
||||||
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
|
|
||||||
// will take 2 allocations in the out of order engine instead of 1
|
|
||||||
// for plain addressing mode, i.e. inst (reg1).
|
|
||||||
// E.g.,
|
|
||||||
// vaddps (%rsi,%rdx), %ymm0, %ymm1
|
|
||||||
// Requires two allocations (one for the load, one for the computation)
|
|
||||||
// whereas:
|
|
||||||
// vaddps (%rsi), %ymm0, %ymm1
|
|
||||||
// Requires just 1 allocation, i.e., freeing allocations for other operations
|
|
||||||
// and having less micro operations to execute.
|
|
||||||
//
|
|
||||||
// For some X86 architectures, this is even worse because for instance for
|
|
||||||
// stores, the complex addressing mode forces the instruction to use the
|
|
||||||
// "load" ports instead of the dedicated "store" port.
|
|
||||||
// E.g., on Haswell:
|
|
||||||
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
|
|
||||||
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
|
|
||||||
if (isLegalAddressingMode(DL, AM, Ty, AS))
|
|
||||||
// Scale represents reg2 * scale, thus account for 1
|
|
||||||
// as soon as we use a second register.
|
|
||||||
return AM.Scale != 0;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
|
bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
|
||||||
// Integer division on x86 is expensive. However, when aggressively optimizing
|
// Integer division on x86 is expensive. However, when aggressively optimizing
|
||||||
// for code size, we prefer to use a div instruction, as it is usually smaller
|
// for code size, we prefer to use a div instruction, as it is usually smaller
|
||||||
|
|
|
@ -1240,15 +1240,6 @@ namespace llvm {
|
||||||
|
|
||||||
bool isLegalStoreImmediate(int64_t Imm) const override;
|
bool isLegalStoreImmediate(int64_t Imm) const override;
|
||||||
|
|
||||||
/// Return the cost of the scaling factor used in the addressing
|
|
||||||
/// mode represented by AM for this target, for a load/store
|
|
||||||
/// of the specified type.
|
|
||||||
/// If the AM is supported, the return value must be >= 0.
|
|
||||||
/// If the AM is not supported, it returns a negative value.
|
|
||||||
InstructionCost getScalingFactorCost(const DataLayout &DL,
|
|
||||||
const AddrMode &AM, Type *Ty,
|
|
||||||
unsigned AS) const override;
|
|
||||||
|
|
||||||
/// This is used to enable splatted operand transforms for vector shifts
|
/// This is used to enable splatted operand transforms for vector shifts
|
||||||
/// and vector funnel shifts.
|
/// and vector funnel shifts.
|
||||||
bool isVectorShiftByScalarCheap(Type *Ty) const override;
|
bool isVectorShiftByScalarCheap(Type *Ty) const override;
|
||||||
|
|
|
@ -202,7 +202,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
|
|
||||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||||
assert(ISD && "Invalid opcode");
|
assert(ISD && "Invalid opcode");
|
||||||
|
@ -1089,7 +1089,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
ArrayRef<const Value *> Args) {
|
ArrayRef<const Value *> Args) {
|
||||||
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
|
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
|
||||||
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
|
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(BaseTp);
|
||||||
|
|
||||||
Kind = improveShuffleKindFromMask(Kind, Mask);
|
Kind = improveShuffleKindFromMask(Kind, Mask);
|
||||||
// Treat Transpose as 2-op shuffles - there's no difference in lowering.
|
// Treat Transpose as 2-op shuffles - there's no difference in lowering.
|
||||||
|
@ -1108,8 +1108,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
int NumElts = LT.second.getVectorNumElements();
|
int NumElts = LT.second.getVectorNumElements();
|
||||||
if ((Index % NumElts) == 0)
|
if ((Index % NumElts) == 0)
|
||||||
return 0;
|
return 0;
|
||||||
std::pair<InstructionCost, MVT> SubLT =
|
std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
|
||||||
TLI->getTypeLegalizationCost(DL, SubTp);
|
|
||||||
if (SubLT.second.isVector()) {
|
if (SubLT.second.isVector()) {
|
||||||
int NumSubElts = SubLT.second.getVectorNumElements();
|
int NumSubElts = SubLT.second.getVectorNumElements();
|
||||||
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
|
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
|
||||||
|
@ -1155,8 +1154,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
// isn't free, because we need to preserve the rest of the wide vector.
|
// isn't free, because we need to preserve the rest of the wide vector.
|
||||||
if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) {
|
if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) {
|
||||||
int NumElts = LT.second.getVectorNumElements();
|
int NumElts = LT.second.getVectorNumElements();
|
||||||
std::pair<InstructionCost, MVT> SubLT =
|
std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
|
||||||
TLI->getTypeLegalizationCost(DL, SubTp);
|
|
||||||
if (SubLT.second.isVector()) {
|
if (SubLT.second.isVector()) {
|
||||||
int NumSubElts = SubLT.second.getVectorNumElements();
|
int NumSubElts = SubLT.second.getVectorNumElements();
|
||||||
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
|
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
|
||||||
|
@ -2528,9 +2526,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fall back to legalized types.
|
// Fall back to legalized types.
|
||||||
std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<InstructionCost, MVT> LTSrc = getTypeLegalizationCost(Src);
|
||||||
std::pair<InstructionCost, MVT> LTDest =
|
std::pair<InstructionCost, MVT> LTDest = getTypeLegalizationCost(Dst);
|
||||||
TLI->getTypeLegalizationCost(DL, Dst);
|
|
||||||
|
|
||||||
// If we're truncating to the same legalized type - just assume its free.
|
// If we're truncating to the same legalized type - just assume its free.
|
||||||
if (ISD == ISD::TRUNCATE && LTSrc.second == LTDest.second)
|
if (ISD == ISD::TRUNCATE && LTSrc.second == LTDest.second)
|
||||||
|
@ -2630,7 +2627,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
I);
|
I);
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
|
|
||||||
MVT MTy = LT.second;
|
MVT MTy = LT.second;
|
||||||
|
|
||||||
|
@ -3395,7 +3392,7 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
|
|
||||||
if (ISD != ISD::DELETED_NODE) {
|
if (ISD != ISD::DELETED_NODE) {
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(OpTy);
|
||||||
MVT MTy = LT.second;
|
MVT MTy = LT.second;
|
||||||
|
|
||||||
// Attempt to lookup cost.
|
// Attempt to lookup cost.
|
||||||
|
@ -3629,8 +3626,7 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||||
|
|
||||||
if (ISD != ISD::DELETED_NODE) {
|
if (ISD != ISD::DELETED_NODE) {
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT =
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
|
||||||
TLI->getTypeLegalizationCost(DL, RetTy);
|
|
||||||
MVT MTy = LT.second;
|
MVT MTy = LT.second;
|
||||||
|
|
||||||
// Attempt to lookup cost.
|
// Attempt to lookup cost.
|
||||||
|
@ -3709,7 +3705,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
|
||||||
|
|
||||||
// This type is legalized to a scalar type.
|
// This type is legalized to a scalar type.
|
||||||
if (!LT.second.isVector())
|
if (!LT.second.isVector())
|
||||||
|
@ -3797,7 +3793,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
|
||||||
cast<FixedVectorType>(Ty)->getNumElements() &&
|
cast<FixedVectorType>(Ty)->getNumElements() &&
|
||||||
"Vector size mismatch");
|
"Vector size mismatch");
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
MVT MScalarTy = LT.second.getScalarType();
|
MVT MScalarTy = LT.second.getScalarType();
|
||||||
unsigned SizeInBits = LT.second.getSizeInBits();
|
unsigned SizeInBits = LT.second.getSizeInBits();
|
||||||
|
|
||||||
|
@ -3987,10 +3983,10 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
|
||||||
auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements);
|
auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements);
|
||||||
|
|
||||||
// Legalize the types.
|
// Legalize the types.
|
||||||
MVT LegalSrcVecTy = TLI->getTypeLegalizationCost(DL, SrcVecTy).second;
|
MVT LegalSrcVecTy = getTypeLegalizationCost(SrcVecTy).second;
|
||||||
MVT LegalPromSrcVecTy = TLI->getTypeLegalizationCost(DL, PromSrcVecTy).second;
|
MVT LegalPromSrcVecTy = getTypeLegalizationCost(PromSrcVecTy).second;
|
||||||
MVT LegalPromDstVecTy = TLI->getTypeLegalizationCost(DL, PromDstVecTy).second;
|
MVT LegalPromDstVecTy = getTypeLegalizationCost(PromDstVecTy).second;
|
||||||
MVT LegalDstVecTy = TLI->getTypeLegalizationCost(DL, DstVecTy).second;
|
MVT LegalDstVecTy = getTypeLegalizationCost(DstVecTy).second;
|
||||||
// They should have legalized into vector types.
|
// They should have legalized into vector types.
|
||||||
if (!LegalSrcVecTy.isVector() || !LegalPromSrcVecTy.isVector() ||
|
if (!LegalSrcVecTy.isVector() || !LegalPromSrcVecTy.isVector() ||
|
||||||
!LegalPromDstVecTy.isVector() || !LegalDstVecTy.isVector())
|
!LegalPromDstVecTy.isVector() || !LegalDstVecTy.isVector())
|
||||||
|
@ -4064,7 +4060,7 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
CostKind);
|
CostKind);
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
|
||||||
|
|
||||||
auto *VTy = dyn_cast<FixedVectorType>(Src);
|
auto *VTy = dyn_cast<FixedVectorType>(Src);
|
||||||
|
|
||||||
|
@ -4227,7 +4223,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Legalize the type.
|
// Legalize the type.
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);
|
||||||
auto VT = TLI->getValueType(DL, SrcVTy);
|
auto VT = TLI->getValueType(DL, SrcVTy);
|
||||||
InstructionCost Cost = 0;
|
InstructionCost Cost = 0;
|
||||||
if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
|
if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
|
||||||
|
@ -4343,7 +4339,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||||
return Entry->Cost;
|
return Entry->Cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
|
|
||||||
MVT MTy = LT.second;
|
MVT MTy = LT.second;
|
||||||
|
|
||||||
|
@ -4531,7 +4527,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||||
|
|
||||||
InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
|
InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
|
||||||
bool IsUnsigned) {
|
bool IsUnsigned) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||||
|
|
||||||
MVT MTy = LT.second;
|
MVT MTy = LT.second;
|
||||||
|
|
||||||
|
@ -4661,7 +4657,7 @@ InstructionCost
|
||||||
X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
|
X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
|
||||||
bool IsUnsigned,
|
bool IsUnsigned,
|
||||||
TTI::TargetCostKind CostKind) {
|
TTI::TargetCostKind CostKind) {
|
||||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
|
||||||
|
|
||||||
MVT MTy = LT.second;
|
MVT MTy = LT.second;
|
||||||
|
|
||||||
|
@ -5088,10 +5084,8 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,
|
||||||
|
|
||||||
auto *IndexVTy = FixedVectorType::get(
|
auto *IndexVTy = FixedVectorType::get(
|
||||||
IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
|
IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
|
||||||
std::pair<InstructionCost, MVT> IdxsLT =
|
std::pair<InstructionCost, MVT> IdxsLT = getTypeLegalizationCost(IndexVTy);
|
||||||
TLI->getTypeLegalizationCost(DL, IndexVTy);
|
std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcVTy);
|
||||||
std::pair<InstructionCost, MVT> SrcLT =
|
|
||||||
TLI->getTypeLegalizationCost(DL, SrcVTy);
|
|
||||||
InstructionCost::CostType SplitFactor =
|
InstructionCost::CostType SplitFactor =
|
||||||
*std::max(IdxsLT.first, SrcLT.first).getValue();
|
*std::max(IdxsLT.first, SrcLT.first).getValue();
|
||||||
if (SplitFactor > 1) {
|
if (SplitFactor > 1) {
|
||||||
|
@ -5533,7 +5527,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
||||||
|
|
||||||
// Calculate the number of memory operations (NumOfMemOps), required
|
// Calculate the number of memory operations (NumOfMemOps), required
|
||||||
// for load/store the VecTy.
|
// for load/store the VecTy.
|
||||||
MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
|
MVT LegalVT = getTypeLegalizationCost(VecTy).second;
|
||||||
unsigned VecTySize = DL.getTypeStoreSize(VecTy);
|
unsigned VecTySize = DL.getTypeStoreSize(VecTy);
|
||||||
unsigned LegalVTSize = LegalVT.getStoreSize();
|
unsigned LegalVTSize = LegalVT.getStoreSize();
|
||||||
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
|
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
|
||||||
|
@ -5613,8 +5607,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
|
||||||
auto *ResultTy = FixedVectorType::get(VecTy->getElementType(),
|
auto *ResultTy = FixedVectorType::get(VecTy->getElementType(),
|
||||||
VecTy->getNumElements() / Factor);
|
VecTy->getNumElements() / Factor);
|
||||||
InstructionCost NumOfResults =
|
InstructionCost NumOfResults =
|
||||||
getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
|
getTypeLegalizationCost(ResultTy).first * NumOfLoadsInInterleaveGrp;
|
||||||
NumOfLoadsInInterleaveGrp;
|
|
||||||
|
|
||||||
// About a half of the loads may be folded in shuffles when we have only
|
// About a half of the loads may be folded in shuffles when we have only
|
||||||
// one result. If we have more than one result, or the loads are masked,
|
// one result. If we have more than one result, or the loads are masked,
|
||||||
|
@ -5711,7 +5704,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
|
||||||
// VecTy for interleave memop is <VF*Factor x Elt>.
|
// VecTy for interleave memop is <VF*Factor x Elt>.
|
||||||
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
|
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
|
||||||
// VecTy = <12 x i32>.
|
// VecTy = <12 x i32>.
|
||||||
MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
|
MVT LegalVT = getTypeLegalizationCost(VecTy).second;
|
||||||
|
|
||||||
// This function can be called with VecTy=<6xi128>, Factor=3, in which case
|
// This function can be called with VecTy=<6xi128>, Factor=3, in which case
|
||||||
// the VF=2, while v2i128 is an unsupported MVT vector type
|
// the VF=2, while v2i128 is an unsupported MVT vector type
|
||||||
|
@ -5989,3 +5982,37 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
|
||||||
Alignment, AddressSpace, CostKind,
|
Alignment, AddressSpace, CostKind,
|
||||||
UseMaskForCond, UseMaskForGaps);
|
UseMaskForCond, UseMaskForGaps);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||||
|
int64_t BaseOffset,
|
||||||
|
bool HasBaseReg, int64_t Scale,
|
||||||
|
unsigned AddrSpace) const {
|
||||||
|
// Scaling factors are not free at all.
|
||||||
|
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
|
||||||
|
// will take 2 allocations in the out of order engine instead of 1
|
||||||
|
// for plain addressing mode, i.e. inst (reg1).
|
||||||
|
// E.g.,
|
||||||
|
// vaddps (%rsi,%rdx), %ymm0, %ymm1
|
||||||
|
// Requires two allocations (one for the load, one for the computation)
|
||||||
|
// whereas:
|
||||||
|
// vaddps (%rsi), %ymm0, %ymm1
|
||||||
|
// Requires just 1 allocation, i.e., freeing allocations for other operations
|
||||||
|
// and having less micro operations to execute.
|
||||||
|
//
|
||||||
|
// For some X86 architectures, this is even worse because for instance for
|
||||||
|
// stores, the complex addressing mode forces the instruction to use the
|
||||||
|
// "load" ports instead of the dedicated "store" port.
|
||||||
|
// E.g., on Haswell:
|
||||||
|
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
|
||||||
|
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
|
||||||
|
TargetLoweringBase::AddrMode AM;
|
||||||
|
AM.BaseGV = BaseGV;
|
||||||
|
AM.BaseOffs = BaseOffset;
|
||||||
|
AM.HasBaseReg = HasBaseReg;
|
||||||
|
AM.Scale = Scale;
|
||||||
|
if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
|
||||||
|
// Scale represents reg2 * scale, thus account for 1
|
||||||
|
// as soon as we use a second register.
|
||||||
|
return AM.Scale != 0;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
|
@ -226,6 +226,15 @@ public:
|
||||||
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
|
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
|
||||||
const APInt &Imm, Type *Ty,
|
const APInt &Imm, Type *Ty,
|
||||||
TTI::TargetCostKind CostKind);
|
TTI::TargetCostKind CostKind);
|
||||||
|
/// Return the cost of the scaling factor used in the addressing
|
||||||
|
/// mode represented by AM for this target, for a load/store
|
||||||
|
/// of the specified type.
|
||||||
|
/// If the AM is supported, the return value must be >= 0.
|
||||||
|
/// If the AM is not supported, it returns a negative value.
|
||||||
|
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
|
||||||
|
int64_t BaseOffset, bool HasBaseReg,
|
||||||
|
int64_t Scale, unsigned AddrSpace) const;
|
||||||
|
|
||||||
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
|
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
|
||||||
const TargetTransformInfo::LSRCost &C2);
|
const TargetTransformInfo::LSRCost &C2);
|
||||||
bool canMacroFuseCmp();
|
bool canMacroFuseCmp();
|
||||||
|
|
Loading…
Reference in New Issue