forked from OSchip/llvm-project
[AArh64-SVE]: Improve cost model for div/udiv/mul 128-bit vector operations
Differential Revision: https://reviews.llvm.org/D132477
This commit is contained in:
parent
db664a666c
commit
f2072e0ae0
|
@ -2084,12 +2084,40 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
|
||||||
InstructionCost Cost = BaseT::getArithmeticInstrCost(
|
InstructionCost Cost = BaseT::getArithmeticInstrCost(
|
||||||
Opcode, Ty, CostKind, Op1Info, Op2Info);
|
Opcode, Ty, CostKind, Op1Info, Op2Info);
|
||||||
if (Ty->isVectorTy()) {
|
if (Ty->isVectorTy()) {
|
||||||
// On AArch64, vector divisions are not supported natively and are
|
if (TLI->isOperationLegalOrCustom(ISD, LT.second) && ST->hasSVE()) {
|
||||||
// expanded into scalar divisions of each pair of elements.
|
// SDIV/UDIV operations are lowered, then we can have less costs.
|
||||||
Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind,
|
if (isa<FixedVectorType>(Ty) &&
|
||||||
Op1Info, Op2Info);
|
cast<FixedVectorType>(Ty)->getPrimitiveSizeInBits().getFixedSize() <
|
||||||
Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
|
128) {
|
||||||
Op1Info, Op2Info);
|
EVT VT = TLI->getValueType(DL, Ty);
|
||||||
|
static const CostTblEntry DivTbl[]{
|
||||||
|
{ISD::SDIV, MVT::v2i8, 5}, {ISD::SDIV, MVT::v4i8, 8},
|
||||||
|
{ISD::SDIV, MVT::v8i8, 8}, {ISD::SDIV, MVT::v2i16, 5},
|
||||||
|
{ISD::SDIV, MVT::v4i16, 5}, {ISD::SDIV, MVT::v2i32, 1},
|
||||||
|
{ISD::UDIV, MVT::v2i8, 5}, {ISD::UDIV, MVT::v4i8, 8},
|
||||||
|
{ISD::UDIV, MVT::v8i8, 8}, {ISD::UDIV, MVT::v2i16, 5},
|
||||||
|
{ISD::UDIV, MVT::v4i16, 5}, {ISD::UDIV, MVT::v2i32, 1}};
|
||||||
|
|
||||||
|
const auto *Entry = CostTableLookup(DivTbl, ISD, VT.getSimpleVT());
|
||||||
|
if (nullptr != Entry)
|
||||||
|
return Entry->Cost;
|
||||||
|
}
|
||||||
|
// For 8/16-bit elements, the cost is higher because the type
|
||||||
|
// requires promotion and possibly splitting:
|
||||||
|
if (LT.second.getScalarType() == MVT::i8)
|
||||||
|
Cost *= 8;
|
||||||
|
else if (LT.second.getScalarType() == MVT::i16)
|
||||||
|
Cost *= 4;
|
||||||
|
return Cost;
|
||||||
|
} else {
|
||||||
|
// On AArch64, without SVE, vector divisions are expanded
|
||||||
|
// into scalar divisions of each pair of elements.
|
||||||
|
Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty,
|
||||||
|
CostKind, Op1Info, Op2Info);
|
||||||
|
Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
|
||||||
|
Op1Info, Op2Info);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: if one of the arguments is scalar, then it's not necessary to
|
// TODO: if one of the arguments is scalar, then it's not necessary to
|
||||||
// double the cost of handling the vector elements.
|
// double the cost of handling the vector elements.
|
||||||
Cost += Cost;
|
Cost += Cost;
|
||||||
|
@ -2097,16 +2125,23 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
case ISD::MUL:
|
case ISD::MUL:
|
||||||
// Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive
|
// When SVE is available, then we can lower the v2i64 operation using
|
||||||
// as elements are extracted from the vectors and the muls scalarized.
|
// the SVE mul instruction, which has a lower cost.
|
||||||
// As getScalarizationOverhead is a bit too pessimistic, we estimate the
|
if (LT.second == MVT::v2i64 && ST->hasSVE())
|
||||||
// cost for a i64 vector directly here, which is:
|
return LT.first;
|
||||||
|
|
||||||
|
// When SVE is not available, there is no MUL.2d instruction,
|
||||||
|
// which means mul <2 x i64> is expensive as elements are extracted
|
||||||
|
// from the vectors and the muls scalarized.
|
||||||
|
// As getScalarizationOverhead is a bit too pessimistic, we
|
||||||
|
// estimate the cost for a i64 vector directly here, which is:
|
||||||
// - four 2-cost i64 extracts,
|
// - four 2-cost i64 extracts,
|
||||||
// - two 2-cost i64 inserts, and
|
// - two 2-cost i64 inserts, and
|
||||||
// - two 1-cost muls.
|
// - two 1-cost muls.
|
||||||
// So, for a v2i64 with LT.First = 1 the cost is 14, and for a v4i64 with
|
// So, for a v2i64 with LT.First = 1 the cost is 14, and for a v4i64 with
|
||||||
// LT.first = 2 the cost is 28. If both operands are extensions it will not
|
// LT.first = 2 the cost is 28. If both operands are extensions it will not
|
||||||
// need to scalarize so the cost can be cheaper (smull or umull).
|
// need to scalarize so the cost can be cheaper (smull or umull).
|
||||||
|
// so the cost can be cheaper (smull or umull).
|
||||||
if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
|
if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
|
||||||
return LT.first;
|
return LT.first;
|
||||||
return LT.first * 14;
|
return LT.first * 14;
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
|
||||||
|
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=128 | FileCheck %s -D#VBITS=128
|
||||||
|
|
||||||
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
define void @scalable_sdiv() #0 {
|
||||||
|
; CHECK-LABEL: 'scalable_sdiv'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sdiv_nxv16i8 = sdiv <vscale x 16 x i8> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sdiv_nxv8i16 = sdiv <vscale x 8 x i16> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sdiv_nxv4i32 = sdiv <vscale x 4 x i32> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sdiv_nxv2i64 = sdiv <vscale x 2 x i64> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%sdiv_nxv16i8 = sdiv <vscale x 16 x i8> undef, undef
|
||||||
|
%sdiv_nxv8i16 = sdiv <vscale x 8 x i16> undef, undef
|
||||||
|
%sdiv_nxv4i32 = sdiv <vscale x 4 x i32> undef, undef
|
||||||
|
%sdiv_nxv2i64 = sdiv <vscale x 2 x i64> undef, undef
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @scalable_udiv() #0 {
|
||||||
|
; CHECK-LABEL: 'scalable_udiv'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %udiv_nxv16i8 = udiv <vscale x 16 x i8> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %udiv_nxv8i16 = udiv <vscale x 8 x i16> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %udiv_nxv4i32 = udiv <vscale x 4 x i32> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %udiv_nxv2i64 = udiv <vscale x 2 x i64> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%udiv_nxv16i8 = udiv <vscale x 16 x i8> undef, undef
|
||||||
|
%udiv_nxv8i16 = udiv <vscale x 8 x i16> undef, undef
|
||||||
|
%udiv_nxv4i32 = udiv <vscale x 4 x i32> undef, undef
|
||||||
|
%udiv_nxv2i64 = udiv <vscale x 2 x i64> undef, undef
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @scalable_mul() #0 {
|
||||||
|
; CHECK-LABEL: 'scalable_mul'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv16i8 = mul <vscale x 16 x i8> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%mul_nxv16i8 = mul <vscale x 16 x i8> undef, undef
|
||||||
|
%mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef
|
||||||
|
%mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef
|
||||||
|
%mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "target-features"="+sve" }
|
|
@ -58,4 +58,115 @@ define void @add() #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Assuming base_cost = 2
|
||||||
|
; Assuming legalization_cost = (vec_len-1/VBITS)+1
|
||||||
|
; Assuming extra cost of 8 for i8.
|
||||||
|
; Assuming extra cost of 4 for i16.
|
||||||
|
; The hard-coded expected cost is based on VBITS=128
|
||||||
|
define void @sdiv() #0 {
|
||||||
|
; CHECK-LABEL: function 'sdiv'
|
||||||
|
|
||||||
|
; CHECK: cost of 5 for instruction: %sdiv16.i8 = sdiv <2 x i8> undef, undef
|
||||||
|
%sdiv16.i8 = sdiv <2 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 8 for instruction: %sdiv32.i8 = sdiv <4 x i8> undef, undef
|
||||||
|
%sdiv32.i8 = sdiv <4 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 5 for instruction: %sdiv32.i16 = sdiv <2 x i16> undef, undef
|
||||||
|
%sdiv32.i16 = sdiv <2 x i16> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 8 for instruction: %sdiv64.i8 = sdiv <8 x i8> undef, undef
|
||||||
|
%sdiv64.i8 = sdiv <8 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 5 for instruction: %sdiv64.i16 = sdiv <4 x i16> undef, undef
|
||||||
|
%sdiv64.i16 = sdiv <4 x i16> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 1 for instruction: %sdiv64.i32 = sdiv <2 x i32> undef, undef
|
||||||
|
%sdiv64.i32 = sdiv <2 x i32> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 8)]] for instruction: %sdiv128.i8 = sdiv <16 x i8> undef, undef
|
||||||
|
%sdiv128.i8 = sdiv <16 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 4)]] for instruction: %sdiv128.i16 = sdiv <8 x i16> undef, undef
|
||||||
|
%sdiv128.i16 = sdiv <8 x i16> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(div(128-1, VBITS)+1, 2)]] for instruction: %sdiv128.i64 = sdiv <2 x i64> undef, undef
|
||||||
|
%sdiv128.i64 = sdiv <2 x i64> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 8)]] for instruction: %sdiv512.i8 = sdiv <64 x i8> undef, undef
|
||||||
|
%sdiv512.i8 = sdiv <64 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 4)]] for instruction: %sdiv512.i16 = sdiv <32 x i16> undef, undef
|
||||||
|
%sdiv512.i16 = sdiv <32 x i16> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %sdiv512.i32 = sdiv <16 x i32> undef, undef
|
||||||
|
%sdiv512.i32 = sdiv <16 x i32> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %sdiv512.i64 = sdiv <8 x i64> undef, undef
|
||||||
|
%sdiv512.i64 = sdiv <8 x i64> undef, undef
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Assuming base_cost = 2
|
||||||
|
; Assuming legalization_cost = (vec_len-1/VBITS)+1
|
||||||
|
; Assuming extra cost of 8 for i8.
|
||||||
|
; Assuming extra cost of 4 for i16.
|
||||||
|
; The hard-coded expected cost is based on VBITS=128
|
||||||
|
define void @udiv() #0 {
|
||||||
|
; CHECK-LABEL: function 'udiv'
|
||||||
|
|
||||||
|
; CHECK: cost of 5 for instruction: %udiv16.i8 = udiv <2 x i8> undef, undef
|
||||||
|
%udiv16.i8 = udiv <2 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 8 for instruction: %udiv32.i8 = udiv <4 x i8> undef, undef
|
||||||
|
%udiv32.i8 = udiv <4 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 5 for instruction: %udiv32.i16 = udiv <2 x i16> undef, undef
|
||||||
|
%udiv32.i16 = udiv <2 x i16> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 8 for instruction: %udiv64.i8 = udiv <8 x i8> undef, undef
|
||||||
|
%udiv64.i8 = udiv <8 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 5 for instruction: %udiv64.i16 = udiv <4 x i16> undef, undef
|
||||||
|
%udiv64.i16 = udiv <4 x i16> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of 1 for instruction: %udiv64.i32 = udiv <2 x i32> undef, undef
|
||||||
|
%udiv64.i32 = udiv <2 x i32> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 8)]] for instruction: %udiv128.i8 = udiv <16 x i8> undef, undef
|
||||||
|
%udiv128.i8 = udiv <16 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 4)]] for instruction: %udiv128.i16 = udiv <8 x i16> undef, undef
|
||||||
|
%udiv128.i16 = udiv <8 x i16> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(div(128-1, VBITS)+1, 2)]] for instruction: %udiv128.i64 = udiv <2 x i64> undef, undef
|
||||||
|
%udiv128.i64 = udiv <2 x i64> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 8)]] for instruction: %udiv512.i8 = udiv <64 x i8> undef, undef
|
||||||
|
%udiv512.i8 = udiv <64 x i8> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 4)]] for instruction: %udiv512.i16 = udiv <32 x i16> undef, undef
|
||||||
|
%udiv512.i16 = udiv <32 x i16> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %udiv512.i32 = udiv <16 x i32> undef, undef
|
||||||
|
%udiv512.i32 = udiv <16 x i32> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %udiv512.i64 = udiv <8 x i64> undef, undef
|
||||||
|
%udiv512.i64 = udiv <8 x i64> undef, undef
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; The hard-coded expected cost is based on VBITS=128
|
||||||
|
define void @mul() #0 {
|
||||||
|
; CHECK: cost of [[#div(128-1, VBITS)+1]] for instruction: %mul128.i64 = mul <2 x i64> undef, undef
|
||||||
|
%mul128.i64 = mul <2 x i64> undef, undef
|
||||||
|
|
||||||
|
; CHECK: cost of [[#div(512-1, VBITS)+1]] for instruction: %mul512.i64 = mul <8 x i64> undef, undef
|
||||||
|
%mul512.i64 = mul <8 x i64> undef, undef
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
attributes #0 = { "target-features"="+sve" }
|
attributes #0 = { "target-features"="+sve" }
|
||||||
|
|
|
@ -5,30 +5,30 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||||
|
|
||||||
define void @test_urem_srem_expand() {
|
define void @test_urem_srem_expand() {
|
||||||
; CHECK-LABEL: 'test_urem_srem_expand'
|
; CHECK-LABEL: 'test_urem_srem_expand'
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_urem_0 = urem <vscale x 16 x i8> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %legal_type_urem_0 = urem <vscale x 16 x i8> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_urem_1 = urem <vscale x 8 x i16> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_urem_1 = urem <vscale x 8 x i16> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_urem_2 = urem <vscale x 4 x i32> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %legal_type_urem_2 = urem <vscale x 4 x i32> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_urem_3 = urem <vscale x 2 x i64> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %legal_type_urem_3 = urem <vscale x 2 x i64> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_srem_0 = srem <vscale x 16 x i8> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %legal_type_srem_0 = srem <vscale x 16 x i8> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_srem_1 = srem <vscale x 8 x i16> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_srem_1 = srem <vscale x 8 x i16> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_srem_2 = srem <vscale x 4 x i32> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %legal_type_srem_2 = srem <vscale x 4 x i32> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_srem_3 = srem <vscale x 2 x i64> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %legal_type_srem_3 = srem <vscale x 2 x i64> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_urem_0 = urem <vscale x 32 x i8> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %split_type_urem_0 = urem <vscale x 32 x i8> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_urem_1 = urem <vscale x 16 x i16> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_urem_1 = urem <vscale x 16 x i16> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_urem_2 = urem <vscale x 8 x i32> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %split_type_urem_2 = urem <vscale x 8 x i32> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_urem_3 = urem <vscale x 4 x i64> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %split_type_urem_3 = urem <vscale x 4 x i64> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_srem_0 = srem <vscale x 32 x i8> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %split_type_srem_0 = srem <vscale x 32 x i8> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_srem_1 = srem <vscale x 16 x i16> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_srem_1 = srem <vscale x 16 x i16> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_srem_2 = srem <vscale x 8 x i32> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %split_type_srem_2 = srem <vscale x 8 x i32> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_srem_3 = srem <vscale x 4 x i64> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %split_type_srem_3 = srem <vscale x 4 x i64> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_urem_0 = urem <vscale x 31 x i8> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %widen_type_urem_0 = urem <vscale x 31 x i8> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_urem_1 = urem <vscale x 15 x i16> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_urem_1 = urem <vscale x 15 x i16> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_urem_2 = urem <vscale x 7 x i32> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %widen_type_urem_2 = urem <vscale x 7 x i32> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_urem_3 = urem <vscale x 3 x i64> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %widen_type_urem_3 = urem <vscale x 3 x i64> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_srem_0 = srem <vscale x 31 x i8> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %widen_type_srem_0 = srem <vscale x 31 x i8> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_srem_1 = srem <vscale x 15 x i16> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_srem_1 = srem <vscale x 15 x i16> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_srem_2 = srem <vscale x 7 x i32> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %widen_type_srem_2 = srem <vscale x 7 x i32> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_srem_3 = srem <vscale x 3 x i64> undef, undef
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %widen_type_srem_3 = srem <vscale x 3 x i64> undef, undef
|
||||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
|
|
Loading…
Reference in New Issue