forked from OSchip/llvm-project
[X86] Improve min/max reduction costs.
This is similar to what I recently did for getArithmeticReductionCost. I'm trying to account for the narrowing from 512->256->128 as we go. I've also added a new helper method getMinMaxCost that tries to handle the cases where we have native min/max instructions and fall back to cmp+select when we don't. Differential Revision: https://reviews.llvm.org/D76634
This commit is contained in:
parent
5fe2809447
commit
5625e6ab37
|
@ -2913,8 +2913,134 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
|
|||
return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
|
||||
}
|
||||
|
||||
int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) {
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
|
||||
|
||||
MVT MTy = LT.second;
|
||||
|
||||
int ISD;
|
||||
if (Ty->isIntOrIntVectorTy()) {
|
||||
ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
|
||||
} else {
|
||||
assert(Ty->isFPOrFPVectorTy() &&
|
||||
"Expected float point or integer vector type.");
|
||||
ISD = ISD::FMINNUM;
|
||||
}
|
||||
|
||||
static const CostTblEntry SSE1CostTbl[] = {
|
||||
{ISD::FMINNUM, MVT::v4f32, 1},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE2CostTbl[] = {
|
||||
{ISD::FMINNUM, MVT::v2f64, 1},
|
||||
{ISD::SMIN, MVT::v8i16, 1},
|
||||
{ISD::UMIN, MVT::v16i8, 1},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE41CostTbl[] = {
|
||||
{ISD::SMIN, MVT::v4i32, 1},
|
||||
{ISD::UMIN, MVT::v4i32, 1},
|
||||
{ISD::UMIN, MVT::v8i16, 1},
|
||||
{ISD::SMIN, MVT::v16i8, 1},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE42CostTbl[] = {
|
||||
{ISD::UMIN, MVT::v2i64, 3}, // xor+pcmpgtq+blendvpd
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX1CostTbl[] = {
|
||||
{ISD::FMINNUM, MVT::v8f32, 1},
|
||||
{ISD::FMINNUM, MVT::v4f64, 1},
|
||||
{ISD::SMIN, MVT::v8i32, 3},
|
||||
{ISD::UMIN, MVT::v8i32, 3},
|
||||
{ISD::SMIN, MVT::v16i16, 3},
|
||||
{ISD::UMIN, MVT::v16i16, 3},
|
||||
{ISD::SMIN, MVT::v32i8, 3},
|
||||
{ISD::UMIN, MVT::v32i8, 3},
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX2CostTbl[] = {
|
||||
{ISD::SMIN, MVT::v8i32, 1},
|
||||
{ISD::UMIN, MVT::v8i32, 1},
|
||||
{ISD::SMIN, MVT::v16i16, 1},
|
||||
{ISD::UMIN, MVT::v16i16, 1},
|
||||
{ISD::SMIN, MVT::v32i8, 1},
|
||||
{ISD::UMIN, MVT::v32i8, 1},
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX512CostTbl[] = {
|
||||
{ISD::FMINNUM, MVT::v16f32, 1},
|
||||
{ISD::FMINNUM, MVT::v8f64, 1},
|
||||
{ISD::SMIN, MVT::v2i64, 1},
|
||||
{ISD::UMIN, MVT::v2i64, 1},
|
||||
{ISD::SMIN, MVT::v4i64, 1},
|
||||
{ISD::UMIN, MVT::v4i64, 1},
|
||||
{ISD::SMIN, MVT::v8i64, 1},
|
||||
{ISD::UMIN, MVT::v8i64, 1},
|
||||
{ISD::SMIN, MVT::v16i32, 1},
|
||||
{ISD::UMIN, MVT::v16i32, 1},
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX512BWCostTbl[] = {
|
||||
{ISD::SMIN, MVT::v32i16, 1},
|
||||
{ISD::UMIN, MVT::v32i16, 1},
|
||||
{ISD::SMIN, MVT::v64i8, 1},
|
||||
{ISD::UMIN, MVT::v64i8, 1},
|
||||
};
|
||||
|
||||
// If we have a native MIN/MAX instruction for this type, use it.
|
||||
if (ST->hasBWI())
|
||||
if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasAVX512())
|
||||
if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasAVX2())
|
||||
if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasAVX())
|
||||
if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE42())
|
||||
if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE41())
|
||||
if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE1())
|
||||
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
unsigned CmpOpcode;
|
||||
if (Ty->isFPOrFPVectorTy()) {
|
||||
CmpOpcode = Instruction::FCmp;
|
||||
} else {
|
||||
assert(Ty->isIntOrIntVectorTy() &&
|
||||
"expecting floating point or integer type for min/max reduction");
|
||||
CmpOpcode = Instruction::ICmp;
|
||||
}
|
||||
|
||||
// Otherwise fall back to cmp+select.
|
||||
return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
|
||||
getCmpSelInstrCost(Instruction::Select, Ty, CondTy, nullptr);
|
||||
}
|
||||
|
||||
int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
|
||||
bool IsPairwise, bool IsUnsigned) {
|
||||
// Just use the default implementation for pair reductions.
|
||||
if (IsPairwise)
|
||||
return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
|
||||
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
|
||||
MVT MTy = LT.second;
|
||||
|
@ -2931,216 +3057,154 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
|
|||
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
|
||||
// and make it as the cost.
|
||||
|
||||
static const CostTblEntry SSE1CostTblPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v4f32, 4},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE2CostTblPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v2f64, 3},
|
||||
{ISD::SMIN, MVT::v2i64, 6},
|
||||
{ISD::UMIN, MVT::v2i64, 8},
|
||||
{ISD::SMIN, MVT::v4i32, 6},
|
||||
{ISD::UMIN, MVT::v4i32, 8},
|
||||
{ISD::SMIN, MVT::v8i16, 4},
|
||||
{ISD::UMIN, MVT::v8i16, 6},
|
||||
{ISD::SMIN, MVT::v16i8, 8},
|
||||
{ISD::UMIN, MVT::v16i8, 6},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE41CostTblPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v4f32, 2},
|
||||
{ISD::SMIN, MVT::v2i64, 9},
|
||||
{ISD::UMIN, MVT::v2i64,10},
|
||||
{ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
|
||||
{ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
|
||||
{ISD::SMIN, MVT::v8i16, 2},
|
||||
{ISD::UMIN, MVT::v8i16, 2},
|
||||
{ISD::SMIN, MVT::v16i8, 3},
|
||||
{ISD::UMIN, MVT::v16i8, 3},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE42CostTblPairWise[] = {
|
||||
{ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
|
||||
{ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX1CostTblPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v4f32, 1},
|
||||
{ISD::FMINNUM, MVT::v4f64, 1},
|
||||
{ISD::FMINNUM, MVT::v8f32, 2},
|
||||
{ISD::SMIN, MVT::v2i64, 3},
|
||||
{ISD::UMIN, MVT::v2i64, 3},
|
||||
{ISD::SMIN, MVT::v4i32, 1},
|
||||
{ISD::UMIN, MVT::v4i32, 1},
|
||||
{ISD::SMIN, MVT::v8i16, 1},
|
||||
{ISD::UMIN, MVT::v8i16, 1},
|
||||
{ISD::SMIN, MVT::v16i8, 2},
|
||||
{ISD::UMIN, MVT::v16i8, 2},
|
||||
{ISD::SMIN, MVT::v4i64, 7},
|
||||
{ISD::UMIN, MVT::v4i64, 7},
|
||||
{ISD::SMIN, MVT::v8i32, 3},
|
||||
{ISD::UMIN, MVT::v8i32, 3},
|
||||
{ISD::SMIN, MVT::v16i16, 3},
|
||||
{ISD::UMIN, MVT::v16i16, 3},
|
||||
{ISD::SMIN, MVT::v32i8, 3},
|
||||
{ISD::UMIN, MVT::v32i8, 3},
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX2CostTblPairWise[] = {
|
||||
{ISD::SMIN, MVT::v4i64, 2},
|
||||
{ISD::UMIN, MVT::v4i64, 2},
|
||||
{ISD::SMIN, MVT::v8i32, 1},
|
||||
{ISD::UMIN, MVT::v8i32, 1},
|
||||
{ISD::SMIN, MVT::v16i16, 1},
|
||||
{ISD::UMIN, MVT::v16i16, 1},
|
||||
{ISD::SMIN, MVT::v32i8, 2},
|
||||
{ISD::UMIN, MVT::v32i8, 2},
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX512CostTblPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v8f64, 1},
|
||||
{ISD::FMINNUM, MVT::v16f32, 2},
|
||||
{ISD::SMIN, MVT::v8i64, 2},
|
||||
{ISD::UMIN, MVT::v8i64, 2},
|
||||
{ISD::SMIN, MVT::v16i32, 1},
|
||||
{ISD::UMIN, MVT::v16i32, 1},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE1CostTblNoPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v4f32, 4},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE2CostTblNoPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v2f64, 3},
|
||||
{ISD::SMIN, MVT::v2i64, 6},
|
||||
{ISD::UMIN, MVT::v2i64, 8},
|
||||
{ISD::SMIN, MVT::v4i32, 6},
|
||||
{ISD::UMIN, MVT::v4i32, 8},
|
||||
{ISD::SMIN, MVT::v8i16, 4},
|
||||
{ISD::UMIN, MVT::v8i16, 6},
|
||||
{ISD::SMIN, MVT::v16i8, 8},
|
||||
{ISD::UMIN, MVT::v16i8, 6},
|
||||
{ISD::UMIN, MVT::v2i16, 5}, // need pxors to use pminsw/pmaxsw
|
||||
{ISD::UMIN, MVT::v4i16, 7}, // need pxors to use pminsw/pmaxsw
|
||||
{ISD::UMIN, MVT::v8i16, 9}, // need pxors to use pminsw/pmaxsw
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE41CostTblNoPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v4f32, 3},
|
||||
{ISD::SMIN, MVT::v2i64, 9},
|
||||
{ISD::UMIN, MVT::v2i64,11},
|
||||
{ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
|
||||
{ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
|
||||
{ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5"
|
||||
{ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8"
|
||||
{ISD::SMIN, MVT::v16i8, 3},
|
||||
{ISD::UMIN, MVT::v16i8, 3},
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE42CostTblNoPairWise[] = {
|
||||
{ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
|
||||
{ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
|
||||
{ISD::SMIN, MVT::v2i16, 3}, // same as sse2
|
||||
{ISD::SMIN, MVT::v4i16, 5}, // same as sse2
|
||||
{ISD::UMIN, MVT::v2i16, 5}, // same as sse2
|
||||
{ISD::UMIN, MVT::v4i16, 7}, // same as sse2
|
||||
{ISD::SMIN, MVT::v8i16, 4}, // phminposuw+xor
|
||||
{ISD::UMIN, MVT::v8i16, 4}, // FIXME: umin is cheaper than umax
|
||||
{ISD::SMIN, MVT::v2i8, 3}, // pminsb
|
||||
{ISD::SMIN, MVT::v4i8, 5}, // pminsb
|
||||
{ISD::SMIN, MVT::v8i8, 7}, // pminsb
|
||||
{ISD::SMIN, MVT::v16i8, 6},
|
||||
{ISD::UMIN, MVT::v2i8, 3}, // same as sse2
|
||||
{ISD::UMIN, MVT::v4i8, 5}, // same as sse2
|
||||
{ISD::UMIN, MVT::v8i8, 7}, // same as sse2
|
||||
{ISD::UMIN, MVT::v16i8, 6}, // FIXME: umin is cheaper than umax
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX1CostTblNoPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v4f32, 1},
|
||||
{ISD::FMINNUM, MVT::v4f64, 1},
|
||||
{ISD::FMINNUM, MVT::v8f32, 1},
|
||||
{ISD::SMIN, MVT::v2i64, 3},
|
||||
{ISD::UMIN, MVT::v2i64, 3},
|
||||
{ISD::SMIN, MVT::v4i32, 1},
|
||||
{ISD::UMIN, MVT::v4i32, 1},
|
||||
{ISD::SMIN, MVT::v8i16, 1},
|
||||
{ISD::UMIN, MVT::v8i16, 1},
|
||||
{ISD::SMIN, MVT::v16i8, 2},
|
||||
{ISD::UMIN, MVT::v16i8, 2},
|
||||
{ISD::SMIN, MVT::v4i64, 7},
|
||||
{ISD::UMIN, MVT::v4i64, 7},
|
||||
{ISD::SMIN, MVT::v8i32, 2},
|
||||
{ISD::UMIN, MVT::v8i32, 2},
|
||||
{ISD::SMIN, MVT::v16i16, 2},
|
||||
{ISD::UMIN, MVT::v16i16, 2},
|
||||
{ISD::SMIN, MVT::v32i8, 2},
|
||||
{ISD::UMIN, MVT::v32i8, 2},
|
||||
{ISD::SMIN, MVT::v16i16, 6},
|
||||
{ISD::UMIN, MVT::v16i16, 6}, // FIXME: umin is cheaper than umax
|
||||
{ISD::SMIN, MVT::v32i8, 8},
|
||||
{ISD::UMIN, MVT::v32i8, 8},
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX2CostTblNoPairWise[] = {
|
||||
{ISD::SMIN, MVT::v4i64, 1},
|
||||
{ISD::UMIN, MVT::v4i64, 1},
|
||||
{ISD::SMIN, MVT::v8i32, 1},
|
||||
{ISD::UMIN, MVT::v8i32, 1},
|
||||
{ISD::SMIN, MVT::v16i16, 1},
|
||||
{ISD::UMIN, MVT::v16i16, 1},
|
||||
{ISD::SMIN, MVT::v32i8, 1},
|
||||
{ISD::UMIN, MVT::v32i8, 1},
|
||||
static const CostTblEntry AVX512BWCostTblNoPairWise[] = {
|
||||
{ISD::SMIN, MVT::v32i16, 8},
|
||||
{ISD::UMIN, MVT::v32i16, 8}, // FIXME: umin is cheaper than umax
|
||||
{ISD::SMIN, MVT::v64i8, 10},
|
||||
{ISD::UMIN, MVT::v64i8, 10},
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX512CostTblNoPairWise[] = {
|
||||
{ISD::FMINNUM, MVT::v8f64, 1},
|
||||
{ISD::FMINNUM, MVT::v16f32, 2},
|
||||
{ISD::SMIN, MVT::v8i64, 1},
|
||||
{ISD::UMIN, MVT::v8i64, 1},
|
||||
{ISD::SMIN, MVT::v16i32, 1},
|
||||
{ISD::UMIN, MVT::v16i32, 1},
|
||||
};
|
||||
|
||||
if (IsPairwise) {
|
||||
if (ST->hasAVX512())
|
||||
if (const auto *Entry = CostTableLookup(AVX512CostTblPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasAVX2())
|
||||
if (const auto *Entry = CostTableLookup(AVX2CostTblPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasAVX())
|
||||
if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE42())
|
||||
if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE41())
|
||||
if (const auto *Entry = CostTableLookup(SSE41CostTblPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE1())
|
||||
if (const auto *Entry = CostTableLookup(SSE1CostTblPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
} else {
|
||||
if (ST->hasAVX512())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX512CostTblNoPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasAVX2())
|
||||
if (const auto *Entry = CostTableLookup(AVX2CostTblNoPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
// Before legalizing the type, give a chance to look up illegal narrow types
|
||||
// in the table.
|
||||
// FIXME: Is there a better way to do this?
|
||||
EVT VT = TLI->getValueType(DL, ValTy);
|
||||
if (VT.isSimple()) {
|
||||
MVT MTy = VT.getSimpleVT();
|
||||
if (ST->hasBWI())
|
||||
if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy))
|
||||
return Entry->Cost;
|
||||
|
||||
if (ST->hasAVX())
|
||||
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE42())
|
||||
if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
return Entry->Cost;
|
||||
|
||||
if (ST->hasSSE41())
|
||||
if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
return Entry->Cost;
|
||||
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE1())
|
||||
if (const auto *Entry = CostTableLookup(SSE1CostTblNoPairWise, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
return Entry->Cost;
|
||||
}
|
||||
|
||||
unsigned NumVecElts = ValTy->getVectorNumElements();
|
||||
|
||||
Type *Ty = ValTy;
|
||||
unsigned MinMaxCost = 0;
|
||||
if (LT.first != 1 && MTy.isVector() &&
|
||||
MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
|
||||
// Type needs to be split. We need LT.first - 1 operations ops.
|
||||
Ty = VectorType::get(ValTy->getVectorElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
Type *SubCondTy = VectorType::get(CondTy->getVectorElementType(),
|
||||
MTy.getVectorNumElements());
|
||||
MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned);
|
||||
MinMaxCost *= LT.first - 1;
|
||||
NumVecElts = MTy.getVectorNumElements();
|
||||
}
|
||||
|
||||
if (ST->hasBWI())
|
||||
if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy))
|
||||
return MinMaxCost + Entry->Cost;
|
||||
|
||||
if (ST->hasAVX())
|
||||
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
|
||||
return MinMaxCost + Entry->Cost;
|
||||
|
||||
if (ST->hasSSE41())
|
||||
if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
|
||||
return MinMaxCost + Entry->Cost;
|
||||
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
|
||||
return MinMaxCost + Entry->Cost;
|
||||
|
||||
unsigned ScalarSize = ValTy->getScalarSizeInBits();
|
||||
|
||||
// Special case power of 2 reductions where the scalar type isn't changed
|
||||
// by type legalization.
|
||||
if (!isPowerOf2_32(ValTy->getVectorNumElements()) ||
|
||||
ScalarSize != MTy.getScalarSizeInBits())
|
||||
return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
|
||||
|
||||
// Now handle reduction with the legal type, taking into account size changes
|
||||
// at each level.
|
||||
while (NumVecElts > 1) {
|
||||
// Determine the size of the remaining vector we need to reduce.
|
||||
unsigned Size = NumVecElts * ScalarSize;
|
||||
NumVecElts /= 2;
|
||||
// If we're reducing from 256/512 bits, use an extract_subvector.
|
||||
if (Size > 128) {
|
||||
Type *SubTy = VectorType::get(ValTy->getVectorElementType(), NumVecElts);
|
||||
MinMaxCost +=
|
||||
getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy);
|
||||
Ty = SubTy;
|
||||
} else if (Size == 128) {
|
||||
// Reducing from 128 bits is a permute of v2f64/v2i64.
|
||||
Type *ShufTy;
|
||||
if (ValTy->isFloatingPointTy())
|
||||
ShufTy = VectorType::get(Type::getDoubleTy(ValTy->getContext()), 2);
|
||||
else
|
||||
ShufTy = VectorType::get(Type::getInt64Ty(ValTy->getContext()), 2);
|
||||
MinMaxCost +=
|
||||
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
|
||||
} else if (Size == 64) {
|
||||
// Reducing from 64 bits is a shuffle of v4f32/v4i32.
|
||||
Type *ShufTy;
|
||||
if (ValTy->isFloatingPointTy())
|
||||
ShufTy = VectorType::get(Type::getFloatTy(ValTy->getContext()), 4);
|
||||
else
|
||||
ShufTy = VectorType::get(Type::getInt32Ty(ValTy->getContext()), 4);
|
||||
MinMaxCost +=
|
||||
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
|
||||
} else {
|
||||
// Reducing from smaller size is a shift by immediate.
|
||||
Type *ShiftTy = VectorType::get(
|
||||
Type::getIntNTy(ValTy->getContext(), Size), 128 / Size);
|
||||
MinMaxCost += getArithmeticInstrCost(
|
||||
Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue,
|
||||
TargetTransformInfo::OK_UniformConstantValue,
|
||||
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
|
||||
}
|
||||
|
||||
// Add the arithmetic op for this level.
|
||||
Type *SubCondTy = VectorType::get(CondTy->getVectorElementType(),
|
||||
Ty->getVectorNumElements());
|
||||
MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
|
||||
}
|
||||
|
||||
// Add the final extract element to the cost.
|
||||
return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
|
||||
}
|
||||
|
||||
/// Calculate the cost of materializing a 64-bit value. This helper
|
||||
|
|
|
@ -156,6 +156,8 @@ public:
|
|||
int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
|
||||
bool IsPairwiseForm);
|
||||
|
||||
int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
|
||||
|
||||
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
|
||||
bool IsUnsigned);
|
||||
|
||||
|
|
|
@ -12,26 +12,26 @@
|
|||
define i32 @reduce_f64(i32 %arg) {
|
||||
; SSE-LABEL: 'reduce_f64'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'reduce_f64'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_f64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef)
|
||||
|
@ -43,49 +43,31 @@ define i32 @reduce_f64(i32 %arg) {
|
|||
}
|
||||
|
||||
define i32 @reduce_f32(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_f32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_f32'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_f32'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
; SSE-LABEL: 'reduce_f32'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'reduce_f32'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_f32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
|
||||
|
|
|
@ -12,26 +12,26 @@
|
|||
define i32 @reduce_f64(i32 %arg) {
|
||||
; SSE-LABEL: 'reduce_f64'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'reduce_f64'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_f64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> undef)
|
||||
|
@ -43,49 +43,31 @@ define i32 @reduce_f64(i32 %arg) {
|
|||
}
|
||||
|
||||
define i32 @reduce_f32(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_f32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_f32'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_f32'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
; SSE-LABEL: 'reduce_f32'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'reduce_f32'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_f32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
|
||||
|
|
|
@ -12,58 +12,58 @@
|
|||
define i32 @reduce_i64(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i64'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE41-LABEL: 'reduce_i64'
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'reduce_i64'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
|
||||
|
@ -77,50 +77,50 @@ define i32 @reduce_i64(i32 %arg) {
|
|||
define i32 @reduce_i32(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i32'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i32'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
|
@ -133,75 +133,75 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i16(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i16'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i16'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'reduce_i16'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512BW-LABEL: 'reduce_i16'
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'reduce_i16'
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
|
@ -215,83 +215,83 @@ define i32 @reduce_i16(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i8(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i8'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i8'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i8'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i8'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'reduce_i8'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512BW-LABEL: 'reduce_i8'
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'reduce_i8'
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -12,58 +12,58 @@
|
|||
define i32 @reduce_i64(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i64'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE41-LABEL: 'reduce_i64'
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'reduce_i64'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
|
||||
|
@ -77,50 +77,50 @@ define i32 @reduce_i64(i32 %arg) {
|
|||
define i32 @reduce_i32(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i32'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i32'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
|
@ -133,75 +133,75 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i16(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i16'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i16'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'reduce_i16'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512BW-LABEL: 'reduce_i16'
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'reduce_i16'
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
|
@ -215,83 +215,83 @@ define i32 @reduce_i16(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i8(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i8'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i8'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i8'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i8'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'reduce_i8'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512BW-LABEL: 'reduce_i8'
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'reduce_i8'
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -12,58 +12,58 @@
|
|||
define i32 @reduce_i64(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i64'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE41-LABEL: 'reduce_i64'
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'reduce_i64'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
|
||||
|
@ -76,51 +76,51 @@ define i32 @reduce_i64(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i32(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i32'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i32'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
|
@ -133,75 +133,75 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i16(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i16'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i16'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'reduce_i16'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512BW-LABEL: 'reduce_i16'
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'reduce_i16'
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
|
@ -215,83 +215,83 @@ define i32 @reduce_i16(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i8(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i8'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i8'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i8'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i8'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'reduce_i8'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512BW-LABEL: 'reduce_i8'
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'reduce_i8'
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -12,58 +12,58 @@
|
|||
define i32 @reduce_i64(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i64'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE41-LABEL: 'reduce_i64'
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'reduce_i64'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i64'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_i64'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
|
||||
|
@ -76,51 +76,51 @@ define i32 @reduce_i64(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i32(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i32'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i32'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i32'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i32'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'reduce_i32'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
|
@ -133,75 +133,75 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i16(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i16'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i16'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i16'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i16'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'reduce_i16'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512BW-LABEL: 'reduce_i16'
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'reduce_i16'
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
|
@ -215,83 +215,83 @@ define i32 @reduce_i16(i32 %arg) {
|
|||
|
||||
define i32 @reduce_i8(i32 %arg) {
|
||||
; SSE2-LABEL: 'reduce_i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSSE3-LABEL: 'reduce_i8'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE4-LABEL: 'reduce_i8'
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'reduce_i8'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX2-LABEL: 'reduce_i8'
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512F-LABEL: 'reduce_i8'
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512BW-LABEL: 'reduce_i8'
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512DQ-LABEL: 'reduce_i8'
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX1
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX2
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX,AVX1
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX,AVX2
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
|
||||
|
||||
@arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
|
||||
|
@ -460,30 +460,57 @@ define float @maxf32(float) {
|
|||
}
|
||||
|
||||
define i32 @maxi8_mutiple_uses(i32) {
|
||||
; DEFAULT-LABEL: @maxi8_mutiple_uses(
|
||||
; DEFAULT-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
|
||||
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
|
||||
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
|
||||
; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
|
||||
; DEFAULT-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
|
||||
; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
|
||||
; DEFAULT-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; DEFAULT-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
|
||||
; DEFAULT-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
|
||||
; DEFAULT-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; DEFAULT-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; DEFAULT-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
|
||||
; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
|
||||
; DEFAULT-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
|
||||
; DEFAULT-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
|
||||
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
|
||||
; DEFAULT-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
|
||||
; DEFAULT-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
|
||||
; DEFAULT-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
|
||||
; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
|
||||
; DEFAULT-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
|
||||
; DEFAULT-NEXT: store i32 [[TMP15]], i32* @var, align 8
|
||||
; DEFAULT-NEXT: ret i32 [[TMP14]]
|
||||
; SSE-LABEL: @maxi8_mutiple_uses(
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
|
||||
; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
|
||||
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
|
||||
; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
|
||||
; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
|
||||
; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
|
||||
; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
|
||||
; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
|
||||
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
|
||||
; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
|
||||
; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
|
||||
; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
|
||||
; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
|
||||
; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
|
||||
; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
|
||||
; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
|
||||
; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
|
||||
; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
|
||||
; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
|
||||
; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
|
||||
; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
|
||||
; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
|
||||
; SSE-NEXT: store i32 [[TMP24]], i32* @var, align 8
|
||||
; SSE-NEXT: ret i32 [[TMP23]]
|
||||
;
|
||||
; AVX-LABEL: @maxi8_mutiple_uses(
|
||||
; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
|
||||
; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
|
||||
; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
|
||||
; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
|
||||
; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
|
||||
; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
|
||||
; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
|
||||
; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
|
||||
; AVX-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
|
||||
; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
|
||||
; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
|
||||
; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
|
||||
; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
|
||||
; AVX-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
|
||||
; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
|
||||
; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
|
||||
; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
|
||||
; AVX-NEXT: store i32 [[TMP15]], i32* @var, align 8
|
||||
; AVX-NEXT: ret i32 [[TMP14]]
|
||||
;
|
||||
; THRESH-LABEL: @maxi8_mutiple_uses(
|
||||
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
|
||||
|
@ -544,30 +571,57 @@ define i32 @maxi8_mutiple_uses(i32) {
|
|||
}
|
||||
|
||||
define i32 @maxi8_wrong_parent(i32) {
|
||||
; DEFAULT-LABEL: @maxi8_wrong_parent(
|
||||
; DEFAULT-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
|
||||
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
|
||||
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
|
||||
; DEFAULT-NEXT: br label [[PP:%.*]]
|
||||
; DEFAULT: pp:
|
||||
; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
|
||||
; DEFAULT-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
|
||||
; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
|
||||
; DEFAULT-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
|
||||
; DEFAULT-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; DEFAULT-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
|
||||
; DEFAULT-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
|
||||
; DEFAULT-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; DEFAULT-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; DEFAULT-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
|
||||
; DEFAULT-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
|
||||
; DEFAULT-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
|
||||
; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
|
||||
; DEFAULT-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
|
||||
; DEFAULT-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
|
||||
; DEFAULT-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
|
||||
; DEFAULT-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
|
||||
; DEFAULT-NEXT: ret i32 [[OP_EXTRA]]
|
||||
; SSE-LABEL: @maxi8_wrong_parent(
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
|
||||
; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
|
||||
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
|
||||
; SSE-NEXT: br label [[PP:%.*]]
|
||||
; SSE: pp:
|
||||
; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
|
||||
; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
|
||||
; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
|
||||
; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
|
||||
; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
|
||||
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
|
||||
; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
|
||||
; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
|
||||
; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
|
||||
; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
|
||||
; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
|
||||
; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
|
||||
; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
|
||||
; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
|
||||
; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
|
||||
; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
|
||||
; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
|
||||
; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
|
||||
; SSE-NEXT: ret i32 [[TMP23]]
|
||||
;
|
||||
; AVX-LABEL: @maxi8_wrong_parent(
|
||||
; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
|
||||
; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
|
||||
; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
|
||||
; AVX-NEXT: br label [[PP:%.*]]
|
||||
; AVX: pp:
|
||||
; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
|
||||
; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
|
||||
; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
|
||||
; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
|
||||
; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
|
||||
; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
|
||||
; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
|
||||
; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
|
||||
; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
|
||||
; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
|
||||
; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
|
||||
; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
|
||||
; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
|
||||
; AVX-NEXT: ret i32 [[OP_EXTRA]]
|
||||
;
|
||||
; THRESH-LABEL: @maxi8_wrong_parent(
|
||||
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
|
||||
|
|
|
@ -5,30 +5,31 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4
|
|||
; CHECK-LABEL: @foo(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <2 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[REORDER_SHUFFLE]], <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A1:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]]
|
||||
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP0]], [[A2:%.*]]
|
||||
; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP0]], [[A3:%.*]]
|
||||
; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP0]], [[A4:%.*]]
|
||||
; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP0]], [[A5:%.*]]
|
||||
; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP0]], [[A6:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARR]], align 4
|
||||
; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]]
|
||||
; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP1]], [[A8:%.*]]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]]
|
||||
; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]]
|
||||
; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]]
|
||||
; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]]
|
||||
; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]]
|
||||
; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]]
|
||||
; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]]
|
||||
; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]]
|
||||
; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]]
|
||||
; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]]
|
||||
; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]]
|
||||
; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]]
|
||||
; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]]
|
||||
; CHECK-NEXT: ret i32 [[COND44]]
|
||||
;
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 1
|
||||
|
@ -63,32 +64,35 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
|
|||
; CHECK-LABEL: @foo1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]]
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
|
||||
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP1]], [[A2:%.*]]
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[REORDER_SHUFFLE]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 0, i32 3, i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A1:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
|
||||
; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP2]], [[A3:%.*]]
|
||||
; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP0]], [[A4:%.*]]
|
||||
; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP0]], [[A5:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARR]], align 4
|
||||
; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP3]], [[A6:%.*]]
|
||||
; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]]
|
||||
; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP0]], [[A8:%.*]]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]]
|
||||
; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]]
|
||||
; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]]
|
||||
; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]]
|
||||
; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]]
|
||||
; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]]
|
||||
; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]]
|
||||
; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]]
|
||||
; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]]
|
||||
; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]]
|
||||
; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]]
|
||||
; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]]
|
||||
; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]]
|
||||
; CHECK-NEXT: ret i32 [[COND44]]
|
||||
;
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 1
|
||||
|
@ -127,32 +131,35 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
|
|||
; CHECK-LABEL: @foo2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]]
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
|
||||
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP1]], [[A2:%.*]]
|
||||
; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP0]], [[A3:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARR]], align 4
|
||||
; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP2]], [[A4:%.*]]
|
||||
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[REORDER_SHUFFLE]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 2, i32 3, i32 2, i32 1, i32 3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A1:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
|
||||
; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP3]], [[A5:%.*]]
|
||||
; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP2]], [[A6:%.*]]
|
||||
; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]]
|
||||
; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP3]], [[A8:%.*]]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]]
|
||||
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]]
|
||||
; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]]
|
||||
; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]]
|
||||
; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]]
|
||||
; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]]
|
||||
; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]]
|
||||
; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]]
|
||||
; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]]
|
||||
; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]]
|
||||
; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]]
|
||||
; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]]
|
||||
; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]]
|
||||
; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]]
|
||||
; CHECK-NEXT: ret i32 [[COND44]]
|
||||
;
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
|
||||
|
|
Loading…
Reference in New Issue