[CostModel][X86] Find AVX conversion costs using legalized types if custom types didn't match

Building on rG2a1ef8784ad9a, fallback to attempting to match against legalized types like we do for SSE targets.
This commit is contained in:
Simon Pilgrim 2021-07-02 13:41:27 +01:00
parent 02ef0f5ab4
commit 2aecffcd40
6 changed files with 113 additions and 165 deletions

View File

@ -1955,6 +1955,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
@ -1966,6 +1968,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 },
@ -2242,17 +2245,58 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
}
// Fall back to legalized types.
// TODO: Add AVX support.
std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
std::pair<InstructionCost, MVT> LTDest =
TLI->getTypeLegalizationCost(DL, Dst);
if (ST->hasSSE41() && !ST->hasAVX())
if (ST->useAVX512Regs()) {
if (ST->hasBWI())
if (const auto *Entry = ConvertCostTableLookup(
AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
if (ST->hasDQI())
if (const auto *Entry = ConvertCostTableLookup(
AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
if (ST->hasAVX512())
if (const auto *Entry = ConvertCostTableLookup(
AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
}
if (ST->hasBWI())
if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD,
LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
if (ST->hasDQI())
if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD,
LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
if (ST->hasAVX512())
if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD,
LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
if (ST->hasAVX2())
if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
if (ST->hasAVX())
if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
if (ST->hasSSE41())
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
if (ST->hasSSE2() && !ST->hasAVX())
if (ST->hasSSE2())
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
LTDest.second, LTSrc.second))
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);

View File

@ -239,7 +239,7 @@ define i32 @fptosi_float_i32(i32 %arg) {
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'fptosi_float_i32'

View File

@ -85,21 +85,21 @@ define i32 @sitofp_i32_double() {
;
; AVX1-LABEL: 'sitofp_i32_double'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'sitofp_i32_double'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'sitofp_i32_double'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@ -158,7 +158,7 @@ define i32 @sitofp_i8_float() {
;
; AVX-LABEL: 'sitofp_i8_float'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
@ -166,7 +166,7 @@ define i32 @sitofp_i8_float() {
;
; AVX512-LABEL: 'sitofp_i8_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
@ -191,7 +191,7 @@ define i32 @sitofp_i16_float() {
;
; AVX-LABEL: 'sitofp_i16_float'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
@ -199,7 +199,7 @@ define i32 @sitofp_i16_float() {
;
; AVX512-LABEL: 'sitofp_i16_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
@ -232,7 +232,7 @@ define i32 @sitofp_i32_float() {
;
; AVX1-LABEL: 'sitofp_i32_float'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
@ -240,7 +240,7 @@ define i32 @sitofp_i32_float() {
;
; AVX2-LABEL: 'sitofp_i32_float'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
@ -248,7 +248,7 @@ define i32 @sitofp_i32_float() {
;
; AVX512-LABEL: 'sitofp_i32_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
@ -273,16 +273,16 @@ define i32 @sitofp_i64_float() {
;
; AVX-LABEL: 'sitofp_i64_float'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'sitofp_i64_float'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef

View File

@ -224,7 +224,7 @@ define i32 @uitofp_i32_float() {
;
; AVX1-LABEL: 'uitofp_i32_float'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>

View File

@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 {
}
define void @sitofp_2i32_2f64() #0 {
; CHECK-LABEL: @sitofp_2i32_2f64(
; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; CHECK-NEXT: ret void
; SSE-LABEL: @sitofp_2i32_2f64(
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_2i32_2f64(
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
; AVX-NEXT: ret void
;
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
@ -632,32 +638,11 @@ define void @sitofp_4i64_4f32() #0 {
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @sitofp_4i64_4f32(
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @sitofp_4i64_4f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @sitofp_4i64_4f32(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
; AVX256DQ-NEXT: ret void
; AVX-LABEL: @sitofp_4i64_4f32(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
; AVX-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
@ -702,44 +687,11 @@ define void @sitofp_8i64_8f32() #0 {
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @sitofp_8i64_8f32(
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @sitofp_8i64_8f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @sitofp_8i64_8f32(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
; AVX256DQ-NEXT: ret void
; AVX-LABEL: @sitofp_8i64_8f32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
; AVX-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8

View File

@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 {
}
define void @sitofp_2i32_2f64() #0 {
; CHECK-LABEL: @sitofp_2i32_2f64(
; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; CHECK-NEXT: ret void
; SSE-LABEL: @sitofp_2i32_2f64(
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_2i32_2f64(
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
; AVX-NEXT: ret void
;
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
@ -632,32 +638,11 @@ define void @sitofp_4i64_4f32() #0 {
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @sitofp_4i64_4f32(
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @sitofp_4i64_4f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @sitofp_4i64_4f32(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
; AVX256DQ-NEXT: ret void
; AVX-LABEL: @sitofp_4i64_4f32(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
; AVX-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
@ -702,44 +687,11 @@ define void @sitofp_8i64_8f32() #0 {
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @sitofp_8i64_8f32(
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @sitofp_8i64_8f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @sitofp_8i64_8f32(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
; AVX256DQ-NEXT: ret void
; AVX-LABEL: @sitofp_8i64_8f32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
; AVX-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8