forked from OSchip/llvm-project
[AArch64][SVE]Add cost model for vector reduce for scalable vector
This patch computes the cost for vector.reduce<operand> for scalable vectors. The cost is split into two parts: the legalization cost and the horizontal reduction. Differential Revision: https://reviews.llvm.org/D93639
This commit is contained in:
parent
b7e516202e
commit
172f1f8952
|
@ -1288,15 +1288,11 @@ public:
|
||||||
case Intrinsic::vector_reduce_fmin:
|
case Intrinsic::vector_reduce_fmin:
|
||||||
case Intrinsic::vector_reduce_umax:
|
case Intrinsic::vector_reduce_umax:
|
||||||
case Intrinsic::vector_reduce_umin: {
|
case Intrinsic::vector_reduce_umin: {
|
||||||
if (isa<ScalableVectorType>(RetTy))
|
|
||||||
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
|
||||||
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
|
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
|
||||||
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
|
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
|
||||||
}
|
}
|
||||||
case Intrinsic::vector_reduce_fadd:
|
case Intrinsic::vector_reduce_fadd:
|
||||||
case Intrinsic::vector_reduce_fmul: {
|
case Intrinsic::vector_reduce_fmul: {
|
||||||
if (isa<ScalableVectorType>(RetTy))
|
|
||||||
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
|
||||||
IntrinsicCostAttributes Attrs(
|
IntrinsicCostAttributes Attrs(
|
||||||
IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
|
IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
|
||||||
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
|
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
|
||||||
|
|
|
@ -1096,11 +1096,70 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
||||||
|
bool IsPairwise, bool IsUnsigned,
|
||||||
|
TTI::TargetCostKind CostKind) {
|
||||||
|
if (!isa<ScalableVectorType>(Ty))
|
||||||
|
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
|
||||||
|
CostKind);
|
||||||
|
assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
|
||||||
|
"Both vector needs to be scalable");
|
||||||
|
|
||||||
|
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||||
|
int LegalizationCost = 0;
|
||||||
|
if (LT.first > 1) {
|
||||||
|
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
|
||||||
|
unsigned CmpOpcode =
|
||||||
|
Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
|
||||||
|
LegalizationCost =
|
||||||
|
getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
|
||||||
|
CmpInst::BAD_ICMP_PREDICATE, CostKind) +
|
||||||
|
getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
|
||||||
|
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||||
|
LegalizationCost *= LT.first - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LegalizationCost + /*Cost of horizontal reduction*/ 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int AArch64TTIImpl::getArithmeticReductionCostSVE(
|
||||||
|
unsigned Opcode, VectorType *ValTy, bool IsPairwise,
|
||||||
|
TTI::TargetCostKind CostKind) {
|
||||||
|
assert(!IsPairwise && "Cannot be pair wise to continue");
|
||||||
|
|
||||||
|
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||||
|
int LegalizationCost = 0;
|
||||||
|
if (LT.first > 1) {
|
||||||
|
Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
|
||||||
|
LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind);
|
||||||
|
LegalizationCost *= LT.first - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||||
|
assert(ISD && "Invalid opcode");
|
||||||
|
// Add the final reduction cost for the legal horizontal reduction
|
||||||
|
switch (ISD) {
|
||||||
|
case ISD::ADD:
|
||||||
|
case ISD::AND:
|
||||||
|
case ISD::OR:
|
||||||
|
case ISD::XOR:
|
||||||
|
case ISD::FADD:
|
||||||
|
return LegalizationCost + 2;
|
||||||
|
default:
|
||||||
|
// TODO: Replace for invalid when InstructionCost is used
|
||||||
|
// cases not supported by SVE
|
||||||
|
return 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
|
int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
|
||||||
VectorType *ValTy,
|
VectorType *ValTy,
|
||||||
bool IsPairwiseForm,
|
bool IsPairwiseForm,
|
||||||
TTI::TargetCostKind CostKind) {
|
TTI::TargetCostKind CostKind) {
|
||||||
|
|
||||||
|
if (isa<ScalableVectorType>(ValTy))
|
||||||
|
return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm,
|
||||||
|
CostKind);
|
||||||
if (IsPairwiseForm)
|
if (IsPairwiseForm)
|
||||||
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
|
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
|
||||||
CostKind);
|
CostKind);
|
||||||
|
|
|
@ -139,6 +139,14 @@ public:
|
||||||
|
|
||||||
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
||||||
|
|
||||||
|
int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
||||||
|
bool IsPairwise, bool IsUnsigned,
|
||||||
|
TTI::TargetCostKind CostKind);
|
||||||
|
|
||||||
|
int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
|
||||||
|
bool IsPairwiseForm,
|
||||||
|
TTI::TargetCostKind CostKind);
|
||||||
|
|
||||||
int getArithmeticInstrCost(
|
int getArithmeticInstrCost(
|
||||||
unsigned Opcode, Type *Ty,
|
unsigned Opcode, Type *Ty,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
||||||
|
|
|
@ -0,0 +1,251 @@
|
||||||
|
; Check getIntrinsicInstrCost in BasicTTIImpl.h with SVE for vector.reduce.<operand>
|
||||||
|
; Checks legal and not legal vector size
|
||||||
|
|
||||||
|
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||||
|
|
||||||
|
|
||||||
|
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||||
|
; WARN-NOT: warning
|
||||||
|
|
||||||
|
define i32 @add.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'add.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT:Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @add.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'add.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT:Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @mul.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'mul.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @mul.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'mul.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @and.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'and.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @and.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'and.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @or.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'or.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @or.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'or.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @xor.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'xor.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @xor.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'xor.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @umin.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'umin.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @umin.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'umin.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @fmax.f32.nxv4f32(<vscale x 4 x float> %v) {
|
||||||
|
; CHECK-LABEL: 'fmax.f32.nxv4f32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
|
||||||
|
|
||||||
|
%r = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
|
||||||
|
ret float %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @fmax.f64.nxv4f64(<vscale x 4 x double> %v) {
|
||||||
|
; CHECK-LABEL: 'fmax.f64.nxv4f64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
|
||||||
|
|
||||||
|
%r = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
|
||||||
|
ret double %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @fmin.f32.nxv4f32(<vscale x 4 x float> %v) {
|
||||||
|
; CHECK-LABEL: 'fmin.f32.nxv4f32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
|
||||||
|
|
||||||
|
%r = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
|
||||||
|
ret float %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @fmin.f64.nxv4f64(<vscale x 4 x double> %v) {
|
||||||
|
; CHECK-LABEL: 'fmin.f64.nxv4f64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
|
||||||
|
|
||||||
|
%r = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
|
||||||
|
ret double %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @umax.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'umax.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @umax.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'umax.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @smin.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'smin.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @smin.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'smin.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @smax.i32.nxv4i32(<vscale x 4 x i32> %v) {
|
||||||
|
; CHECK-LABEL: 'smax.i32.nxv4i32'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||||
|
|
||||||
|
%r = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
|
||||||
|
ret i32 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @smax.i64.nxv4i64(<vscale x 4 x i64> %v) {
|
||||||
|
; CHECK-LABEL: 'smax.i64.nxv4i64'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||||
|
|
||||||
|
%r = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
|
||||||
|
ret i64 %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @fadda_nxv4f32(float %start, <vscale x 4 x float> %a) #0 {
|
||||||
|
; CHECK-LABEL: 'fadda_nxv4f32
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %start, <vscale x 4 x float> %a)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %res
|
||||||
|
|
||||||
|
%res = call float @llvm.vector.reduce.fadd.nxv4f32(float %start, <vscale x 4 x float> %a)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @fadda_nxv4f64(double %start, <vscale x 4 x double> %a) #0 {
|
||||||
|
; CHECK-LABEL: 'fadda_nxv4f64
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call double @llvm.vector.reduce.fadd.nxv4f64(double %start, <vscale x 4 x double> %a)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
|
||||||
|
|
||||||
|
%res = call double @llvm.vector.reduce.fadd.nxv4f64(double %start, <vscale x 4 x double> %a)
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
|
||||||
|
declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
|
||||||
|
declare i32 @llvm.vector.reduce.fmin.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
|
||||||
|
declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
|
||||||
|
declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
|
||||||
|
declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
|
||||||
|
declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
|
||||||
|
declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
|
Loading…
Reference in New Issue