forked from OSchip/llvm-project
[CostModel][X86] Find AVX conversion costs using legalized types if custom types didn't match
Building on rG2a1ef8784ad9a, fallback to attempting to match against legalized types like we do for SSE targets.
This commit is contained in:
parent
02ef0f5ab4
commit
2aecffcd40
|
@ -1955,6 +1955,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
|
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
|
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 },
|
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 },
|
||||||
|
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 },
|
||||||
|
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 },
|
||||||
|
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
|
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
|
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
|
||||||
|
@ -1966,6 +1968,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
|
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
|
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 },
|
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 },
|
||||||
|
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
|
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
|
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 },
|
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 },
|
||||||
|
@ -2242,17 +2245,58 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fall back to legalized types.
|
// Fall back to legalized types.
|
||||||
// TODO: Add AVX support.
|
|
||||||
std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
|
std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
|
||||||
std::pair<InstructionCost, MVT> LTDest =
|
std::pair<InstructionCost, MVT> LTDest =
|
||||||
TLI->getTypeLegalizationCost(DL, Dst);
|
TLI->getTypeLegalizationCost(DL, Dst);
|
||||||
|
|
||||||
if (ST->hasSSE41() && !ST->hasAVX())
|
if (ST->useAVX512Regs()) {
|
||||||
|
if (ST->hasBWI())
|
||||||
|
if (const auto *Entry = ConvertCostTableLookup(
|
||||||
|
AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
|
||||||
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
|
if (ST->hasDQI())
|
||||||
|
if (const auto *Entry = ConvertCostTableLookup(
|
||||||
|
AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
|
||||||
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
|
if (ST->hasAVX512())
|
||||||
|
if (const auto *Entry = ConvertCostTableLookup(
|
||||||
|
AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
|
||||||
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ST->hasBWI())
|
||||||
|
if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD,
|
||||||
|
LTDest.second, LTSrc.second))
|
||||||
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
|
if (ST->hasDQI())
|
||||||
|
if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD,
|
||||||
|
LTDest.second, LTSrc.second))
|
||||||
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
|
if (ST->hasAVX512())
|
||||||
|
if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD,
|
||||||
|
LTDest.second, LTSrc.second))
|
||||||
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
|
if (ST->hasAVX2())
|
||||||
|
if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
|
||||||
|
LTDest.second, LTSrc.second))
|
||||||
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
|
if (ST->hasAVX())
|
||||||
|
if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
|
||||||
|
LTDest.second, LTSrc.second))
|
||||||
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
|
if (ST->hasSSE41())
|
||||||
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
|
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
|
||||||
LTDest.second, LTSrc.second))
|
LTDest.second, LTSrc.second))
|
||||||
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
if (ST->hasSSE2() && !ST->hasAVX())
|
if (ST->hasSSE2())
|
||||||
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
|
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
|
||||||
LTDest.second, LTSrc.second))
|
LTDest.second, LTSrc.second))
|
||||||
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
|
@ -239,7 +239,7 @@ define i32 @fptosi_float_i32(i32 %arg) {
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: 'fptosi_float_i32'
|
; AVX2-LABEL: 'fptosi_float_i32'
|
||||||
|
|
|
@ -85,21 +85,21 @@ define i32 @sitofp_i32_double() {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: 'sitofp_i32_double'
|
; AVX1-LABEL: 'sitofp_i32_double'
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: 'sitofp_i32_double'
|
; AVX2-LABEL: 'sitofp_i32_double'
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: 'sitofp_i32_double'
|
; AVX512-LABEL: 'sitofp_i32_double'
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
|
@ -158,7 +158,7 @@ define i32 @sitofp_i8_float() {
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i8_float'
|
; AVX-LABEL: 'sitofp_i8_float'
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
|
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
|
||||||
|
@ -166,7 +166,7 @@ define i32 @sitofp_i8_float() {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: 'sitofp_i8_float'
|
; AVX512-LABEL: 'sitofp_i8_float'
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
|
||||||
|
@ -191,7 +191,7 @@ define i32 @sitofp_i16_float() {
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i16_float'
|
; AVX-LABEL: 'sitofp_i16_float'
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
|
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
|
||||||
|
@ -199,7 +199,7 @@ define i32 @sitofp_i16_float() {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: 'sitofp_i16_float'
|
; AVX512-LABEL: 'sitofp_i16_float'
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
|
||||||
|
@ -232,7 +232,7 @@ define i32 @sitofp_i32_float() {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: 'sitofp_i32_float'
|
; AVX1-LABEL: 'sitofp_i32_float'
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
||||||
|
@ -240,7 +240,7 @@ define i32 @sitofp_i32_float() {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: 'sitofp_i32_float'
|
; AVX2-LABEL: 'sitofp_i32_float'
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
||||||
|
@ -248,7 +248,7 @@ define i32 @sitofp_i32_float() {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: 'sitofp_i32_float'
|
; AVX512-LABEL: 'sitofp_i32_float'
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
||||||
|
@ -273,16 +273,16 @@ define i32 @sitofp_i64_float() {
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i64_float'
|
; AVX-LABEL: 'sitofp_i64_float'
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: 'sitofp_i64_float'
|
; AVX512F-LABEL: 'sitofp_i64_float'
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
|
|
|
@ -224,7 +224,7 @@ define i32 @uitofp_i32_float() {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: 'uitofp_i32_float'
|
; AVX1-LABEL: 'uitofp_i32_float'
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
|
||||||
|
|
|
@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sitofp_2i32_2f64() #0 {
|
define void @sitofp_2i32_2f64() #0 {
|
||||||
; CHECK-LABEL: @sitofp_2i32_2f64(
|
; SSE-LABEL: @sitofp_2i32_2f64(
|
||||||
; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||||
; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||||
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||||
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||||
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||||
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||||
; CHECK-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
|
;
|
||||||
|
; AVX-LABEL: @sitofp_2i32_2f64(
|
||||||
|
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||||
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||||
|
; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||||
|
; AVX-NEXT: ret void
|
||||||
;
|
;
|
||||||
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||||
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||||
|
@ -632,32 +638,11 @@ define void @sitofp_4i64_4f32() #0 {
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256NODQ-LABEL: @sitofp_4i64_4f32(
|
; AVX-LABEL: @sitofp_4i64_4f32(
|
||||||
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||||
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||||
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
; AVX-NEXT: ret void
|
||||||
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; AVX256NODQ-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX512-LABEL: @sitofp_4i64_4f32(
|
|
||||||
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
||||||
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
|
||||||
; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
||||||
; AVX512-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX256DQ-LABEL: @sitofp_4i64_4f32(
|
|
||||||
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
||||||
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
|
||||||
; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
||||||
; AVX256DQ-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||||
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||||
|
@ -702,44 +687,11 @@ define void @sitofp_8i64_8f32() #0 {
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256NODQ-LABEL: @sitofp_8i64_8f32(
|
; AVX-LABEL: @sitofp_8i64_8f32(
|
||||||
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
|
||||||
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
|
||||||
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
||||||
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
; AVX-NEXT: ret void
|
||||||
; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
|
||||||
; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
|
||||||
; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
|
||||||
; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
|
||||||
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; AVX256NODQ-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX512-LABEL: @sitofp_8i64_8f32(
|
|
||||||
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
|
|
||||||
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
|
|
||||||
; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
||||||
; AVX512-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX256DQ-LABEL: @sitofp_8i64_8f32(
|
|
||||||
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
|
|
||||||
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
|
|
||||||
; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
||||||
; AVX256DQ-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||||
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||||
|
|
|
@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sitofp_2i32_2f64() #0 {
|
define void @sitofp_2i32_2f64() #0 {
|
||||||
; CHECK-LABEL: @sitofp_2i32_2f64(
|
; SSE-LABEL: @sitofp_2i32_2f64(
|
||||||
; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||||
; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||||
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||||
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||||
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||||
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||||
; CHECK-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
|
;
|
||||||
|
; AVX-LABEL: @sitofp_2i32_2f64(
|
||||||
|
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||||
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||||
|
; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||||
|
; AVX-NEXT: ret void
|
||||||
;
|
;
|
||||||
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||||
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||||
|
@ -632,32 +638,11 @@ define void @sitofp_4i64_4f32() #0 {
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256NODQ-LABEL: @sitofp_4i64_4f32(
|
; AVX-LABEL: @sitofp_4i64_4f32(
|
||||||
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||||
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||||
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
; AVX-NEXT: ret void
|
||||||
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; AVX256NODQ-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX512-LABEL: @sitofp_4i64_4f32(
|
|
||||||
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
||||||
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
|
||||||
; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
||||||
; AVX512-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX256DQ-LABEL: @sitofp_4i64_4f32(
|
|
||||||
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
|
||||||
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
|
||||||
; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
||||||
; AVX256DQ-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||||
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||||
|
@ -702,44 +687,11 @@ define void @sitofp_8i64_8f32() #0 {
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256NODQ-LABEL: @sitofp_8i64_8f32(
|
; AVX-LABEL: @sitofp_8i64_8f32(
|
||||||
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
|
||||||
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
|
||||||
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
||||||
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
; AVX-NEXT: ret void
|
||||||
; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
|
||||||
; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
|
||||||
; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
|
||||||
; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
|
||||||
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
|
|
||||||
; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; AVX256NODQ-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX512-LABEL: @sitofp_8i64_8f32(
|
|
||||||
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
|
|
||||||
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
|
|
||||||
; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
||||||
; AVX512-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX256DQ-LABEL: @sitofp_8i64_8f32(
|
|
||||||
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
|
|
||||||
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
|
|
||||||
; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
|
|
||||||
; AVX256DQ-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||||
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||||
|
|
Loading…
Reference in New Issue