forked from OSchip/llvm-project
[CostModel][X86] Adjust fp<->int vXi32 SSE legalized costs based on llvm-mca reports.
Building on rG2a1ef8784ad9a, adjust the SSE cost tables to use the legalized types based on the worst case costs from the script in D103695. To account for different numbers of src/dst legalized type registers we must scale the cost by maximum of the src/dst, not just use src
This commit is contained in:
parent
4a361f5209
commit
5e5ba14b4d
|
@ -2063,9 +2063,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
|
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
|
||||||
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB
|
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB
|
||||||
|
|
||||||
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
|
|
||||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
|
|
||||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 },
|
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
|
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
|
||||||
|
|
||||||
|
@ -2084,24 +2082,25 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
// These are somewhat magic numbers justified by looking at the output of
|
// These are somewhat magic numbers justified by looking at the output of
|
||||||
// Intel's IACA, running some kernels and making sure when we take
|
// Intel's IACA, running some kernels and making sure when we take
|
||||||
// legalization into account the throughput will be overestimated.
|
// legalization into account the throughput will be overestimated.
|
||||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
|
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 3 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
|
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
|
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 3 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
|
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
|
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 },
|
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 },
|
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 8 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
|
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 8 },
|
||||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
|
|
||||||
|
|
||||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
|
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 8 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
|
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 9 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
|
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
|
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 4 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
|
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 4 },
|
||||||
|
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 },
|
||||||
|
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 7 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
|
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
|
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 15 },
|
||||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
|
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 18 },
|
||||||
|
|
||||||
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
|
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
|
||||||
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
|
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
|
||||||
|
@ -2109,14 +2108,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
|
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
|
||||||
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
|
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
|
||||||
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
|
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
|
||||||
|
|
||||||
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
|
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
|
||||||
|
|
||||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 },
|
|
||||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
|
|
||||||
|
|
||||||
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
|
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
|
||||||
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 },
|
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 15 },
|
||||||
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
|
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
|
||||||
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
|
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
|
||||||
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
|
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
|
||||||
|
@ -2250,12 +2245,12 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
if (ST->hasSSE41() && !ST->hasAVX())
|
if (ST->hasSSE41() && !ST->hasAVX())
|
||||||
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
|
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
|
||||||
LTDest.second, LTSrc.second))
|
LTDest.second, LTSrc.second))
|
||||||
return AdjustCost(LTSrc.first * Entry->Cost);
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
if (ST->hasSSE2() && !ST->hasAVX())
|
if (ST->hasSSE2() && !ST->hasAVX())
|
||||||
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
|
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
|
||||||
LTDest.second, LTSrc.second))
|
LTDest.second, LTSrc.second))
|
||||||
return AdjustCost(LTSrc.first * Entry->Cost);
|
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
|
||||||
|
|
||||||
return AdjustCost(
|
return AdjustCost(
|
||||||
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
|
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
|
||||||
|
|
|
@ -373,23 +373,23 @@ define i32 @masks4(<4 x i1> %in) {
|
||||||
|
|
||||||
define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
||||||
; SSE2-LABEL: 'sitofp4'
|
; SSE2-LABEL: 'sitofp4'
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
;
|
;
|
||||||
; SSE41-LABEL: 'sitofp4'
|
; SSE41-LABEL: 'sitofp4'
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
|
@ -440,16 +440,16 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
||||||
|
|
||||||
define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
|
define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
|
||||||
; SSE2-LABEL: 'sitofp8'
|
; SSE2-LABEL: 'sitofp8'
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
;
|
;
|
||||||
; SSE41-LABEL: 'sitofp8'
|
; SSE41-LABEL: 'sitofp8'
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
|
||||||
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
;
|
;
|
||||||
|
@ -484,13 +484,13 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
|
||||||
define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
||||||
; SSE-LABEL: 'uitofp4'
|
; SSE-LABEL: 'uitofp4'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: 'uitofp4'
|
; AVX1-LABEL: 'uitofp4'
|
||||||
|
@ -539,9 +539,9 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
||||||
|
|
||||||
define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
|
define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
|
||||||
; SSE-LABEL: 'uitofp8'
|
; SSE-LABEL: 'uitofp8'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||||
;
|
;
|
||||||
|
|
|
@ -12,24 +12,24 @@
|
||||||
|
|
||||||
define i32 @fptoui_double_i64(i32 %arg) {
|
define i32 @fptoui_double_i64(i32 %arg) {
|
||||||
; SSE2-LABEL: 'fptoui_double_i64'
|
; SSE2-LABEL: 'fptoui_double_i64'
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; SSE42-LABEL: 'fptoui_double_i64'
|
; SSE42-LABEL: 'fptoui_double_i64'
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'fptoui_double_i64'
|
; AVX-LABEL: 'fptoui_double_i64'
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
|
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: 'fptoui_double_i64'
|
; AVX512F-LABEL: 'fptoui_double_i64'
|
||||||
|
@ -47,10 +47,10 @@ define i32 @fptoui_double_i64(i32 %arg) {
|
||||||
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; SLM-LABEL: 'fptoui_double_i64'
|
; SLM-LABEL: 'fptoui_double_i64'
|
||||||
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
|
; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
|
||||||
; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
|
||||||
; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
|
||||||
; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
|
||||||
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
%I64 = fptoui double undef to i64
|
%I64 = fptoui double undef to i64
|
||||||
|
|
|
@ -13,9 +13,9 @@
|
||||||
define i32 @sitofp_i8_double() {
|
define i32 @sitofp_i8_double() {
|
||||||
; SSE-LABEL: 'sitofp_i8_double'
|
; SSE-LABEL: 'sitofp_i8_double'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f64 = sitofp i8 undef to double
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f64 = sitofp i8 undef to double
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i8_double'
|
; AVX-LABEL: 'sitofp_i8_double'
|
||||||
|
@ -42,9 +42,9 @@ define i32 @sitofp_i8_double() {
|
||||||
define i32 @sitofp_i16_double() {
|
define i32 @sitofp_i16_double() {
|
||||||
; SSE-LABEL: 'sitofp_i16_double'
|
; SSE-LABEL: 'sitofp_i16_double'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f64 = sitofp i16 undef to double
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f64 = sitofp i16 undef to double
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i16_double'
|
; AVX-LABEL: 'sitofp_i16_double'
|
||||||
|
@ -71,35 +71,35 @@ define i32 @sitofp_i16_double() {
|
||||||
define i32 @sitofp_i32_double() {
|
define i32 @sitofp_i32_double() {
|
||||||
; SSE2-LABEL: 'sitofp_i32_double'
|
; SSE2-LABEL: 'sitofp_i32_double'
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; SSE42-LABEL: 'sitofp_i32_double'
|
; SSE42-LABEL: 'sitofp_i32_double'
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: 'sitofp_i32_double'
|
; AVX1-LABEL: 'sitofp_i32_double'
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: 'sitofp_i32_double'
|
; AVX2-LABEL: 'sitofp_i32_double'
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: 'sitofp_i32_double'
|
; AVX512-LABEL: 'sitofp_i32_double'
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
|
@ -114,21 +114,21 @@ define i32 @sitofp_i32_double() {
|
||||||
define i32 @sitofp_i64_double() {
|
define i32 @sitofp_i64_double() {
|
||||||
; SSE-LABEL: 'sitofp_i64_double'
|
; SSE-LABEL: 'sitofp_i64_double'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i64_double'
|
; AVX-LABEL: 'sitofp_i64_double'
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
||||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: 'sitofp_i64_double'
|
; AVX512F-LABEL: 'sitofp_i64_double'
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
||||||
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
|
@ -150,10 +150,10 @@ define i32 @sitofp_i64_double() {
|
||||||
define i32 @sitofp_i8_float() {
|
define i32 @sitofp_i8_float() {
|
||||||
; SSE-LABEL: 'sitofp_i8_float'
|
; SSE-LABEL: 'sitofp_i8_float'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i8_float'
|
; AVX-LABEL: 'sitofp_i8_float'
|
||||||
|
@ -183,10 +183,10 @@ define i32 @sitofp_i8_float() {
|
||||||
define i32 @sitofp_i16_float() {
|
define i32 @sitofp_i16_float() {
|
||||||
; SSE-LABEL: 'sitofp_i16_float'
|
; SSE-LABEL: 'sitofp_i16_float'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i16_float'
|
; AVX-LABEL: 'sitofp_i16_float'
|
||||||
|
@ -216,10 +216,10 @@ define i32 @sitofp_i16_float() {
|
||||||
define i32 @sitofp_i32_float() {
|
define i32 @sitofp_i32_float() {
|
||||||
; SSE2-LABEL: 'sitofp_i32_float'
|
; SSE2-LABEL: 'sitofp_i32_float'
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; SSE42-LABEL: 'sitofp_i32_float'
|
; SSE42-LABEL: 'sitofp_i32_float'
|
||||||
|
@ -232,7 +232,7 @@ define i32 @sitofp_i32_float() {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: 'sitofp_i32_float'
|
; AVX1-LABEL: 'sitofp_i32_float'
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
||||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
||||||
|
@ -240,7 +240,7 @@ define i32 @sitofp_i32_float() {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: 'sitofp_i32_float'
|
; AVX2-LABEL: 'sitofp_i32_float'
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
||||||
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
||||||
|
@ -248,7 +248,7 @@ define i32 @sitofp_i32_float() {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: 'sitofp_i32_float'
|
; AVX512-LABEL: 'sitofp_i32_float'
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
||||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
|
||||||
|
@ -265,10 +265,10 @@ define i32 @sitofp_i32_float() {
|
||||||
define i32 @sitofp_i64_float() {
|
define i32 @sitofp_i64_float() {
|
||||||
; SSE-LABEL: 'sitofp_i64_float'
|
; SSE-LABEL: 'sitofp_i64_float'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'sitofp_i64_float'
|
; AVX-LABEL: 'sitofp_i64_float'
|
||||||
|
|
|
@ -13,9 +13,9 @@
|
||||||
define i32 @uitofp_i8_double() {
|
define i32 @uitofp_i8_double() {
|
||||||
; SSE-LABEL: 'uitofp_i8_double'
|
; SSE-LABEL: 'uitofp_i8_double'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f64 = uitofp i8 undef to double
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f64 = uitofp i8 undef to double
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'uitofp_i8_double'
|
; AVX-LABEL: 'uitofp_i8_double'
|
||||||
|
@ -42,9 +42,9 @@ define i32 @uitofp_i8_double() {
|
||||||
define i32 @uitofp_i16_double() {
|
define i32 @uitofp_i16_double() {
|
||||||
; SSE-LABEL: 'uitofp_i16_double'
|
; SSE-LABEL: 'uitofp_i16_double'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f64 = uitofp i16 undef to double
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f64 = uitofp i16 undef to double
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'uitofp_i16_double'
|
; AVX-LABEL: 'uitofp_i16_double'
|
||||||
|
@ -71,9 +71,9 @@ define i32 @uitofp_i16_double() {
|
||||||
define i32 @uitofp_i32_double() {
|
define i32 @uitofp_i32_double() {
|
||||||
; SSE-LABEL: 'uitofp_i32_double'
|
; SSE-LABEL: 'uitofp_i32_double'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: 'uitofp_i32_double'
|
; AVX1-LABEL: 'uitofp_i32_double'
|
||||||
|
@ -106,17 +106,17 @@ define i32 @uitofp_i32_double() {
|
||||||
|
|
||||||
define i32 @uitofp_i64_double() {
|
define i32 @uitofp_i64_double() {
|
||||||
; SSE2-LABEL: 'uitofp_i64_double'
|
; SSE2-LABEL: 'uitofp_i64_double'
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; SSE42-LABEL: 'uitofp_i64_double'
|
; SSE42-LABEL: 'uitofp_i64_double'
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'uitofp_i64_double'
|
; AVX-LABEL: 'uitofp_i64_double'
|
||||||
|
@ -150,10 +150,10 @@ define i32 @uitofp_i64_double() {
|
||||||
define i32 @uitofp_i8_float() {
|
define i32 @uitofp_i8_float() {
|
||||||
; SSE-LABEL: 'uitofp_i8_float'
|
; SSE-LABEL: 'uitofp_i8_float'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = uitofp i8 undef to float
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = uitofp i8 undef to float
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'uitofp_i8_float'
|
; AVX-LABEL: 'uitofp_i8_float'
|
||||||
|
@ -183,10 +183,10 @@ define i32 @uitofp_i8_float() {
|
||||||
define i32 @uitofp_i16_float() {
|
define i32 @uitofp_i16_float() {
|
||||||
; SSE-LABEL: 'uitofp_i16_float'
|
; SSE-LABEL: 'uitofp_i16_float'
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = uitofp i16 undef to float
|
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = uitofp i16 undef to float
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
|
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
|
||||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'uitofp_i16_float'
|
; AVX-LABEL: 'uitofp_i16_float'
|
||||||
|
@ -256,19 +256,19 @@ define i32 @uitofp_i32_float() {
|
||||||
|
|
||||||
define i32 @uitofp_i64_float() {
|
define i32 @uitofp_i64_float() {
|
||||||
; SSE2-LABEL: 'uitofp_i64_float'
|
; SSE2-LABEL: 'uitofp_i64_float'
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_i64_f32 = uitofp i64 undef to float
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_i64_f32 = uitofp i64 undef to float
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
|
||||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; SSE42-LABEL: 'uitofp_i64_float'
|
; SSE42-LABEL: 'uitofp_i64_float'
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
|
||||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||||
;
|
;
|
||||||
; AVX-LABEL: 'uitofp_i64_float'
|
; AVX-LABEL: 'uitofp_i64_float'
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
|
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
|
||||||
|
|
||||||
define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
|
define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
|
||||||
; CHECK-LABEL: @sitofp_uitofp(
|
; CHECK-LABEL: @sitofp_uitofp(
|
||||||
|
@ -161,43 +161,12 @@ define <8 x i32> @sext_zext(<8 x i16> %a) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
|
define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
|
||||||
; SSE-LABEL: @sitofp_4i32_8i16(
|
; CHECK-LABEL: @sitofp_4i32_8i16(
|
||||||
; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2
|
; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3
|
; CHECK-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
; SSE-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
; CHECK-NEXT: ret <8 x float> [[R72]]
|
||||||
; SSE-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
|
||||||
; SSE-NEXT: [[AB5:%.*]] = sitofp i16 [[B1]] to float
|
|
||||||
; SSE-NEXT: [[AB6:%.*]] = sitofp i16 [[B2]] to float
|
|
||||||
; SSE-NEXT: [[AB7:%.*]] = sitofp i16 [[B3]] to float
|
|
||||||
; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
||||||
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
|
||||||
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
|
||||||
; SSE-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
|
|
||||||
; SSE-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
|
|
||||||
; SSE-NEXT: ret <8 x float> [[R7]]
|
|
||||||
;
|
|
||||||
; SLM-LABEL: @sitofp_4i32_8i16(
|
|
||||||
; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
|
||||||
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
; SLM-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
|
||||||
; SLM-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
||||||
; SLM-NEXT: ret <8 x float> [[R72]]
|
|
||||||
;
|
|
||||||
; AVX-LABEL: @sitofp_4i32_8i16(
|
|
||||||
; AVX-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
|
||||||
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
; AVX-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
|
||||||
; AVX-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
||||||
; AVX-NEXT: ret <8 x float> [[R72]]
|
|
||||||
;
|
|
||||||
; AVX512-LABEL: @sitofp_4i32_8i16(
|
|
||||||
; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
|
||||||
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
; AVX512-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
|
||||||
; AVX512-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
||||||
; AVX512-NEXT: ret <8 x float> [[R72]]
|
|
||||||
;
|
;
|
||||||
%a0 = extractelement <4 x i32> %a, i32 0
|
%a0 = extractelement <4 x i32> %a, i32 0
|
||||||
%a1 = extractelement <4 x i32> %a, i32 1
|
%a1 = extractelement <4 x i32> %a, i32 1
|
||||||
|
@ -228,24 +197,81 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
|
||||||
|
|
||||||
; Inspired by PR38154
|
; Inspired by PR38154
|
||||||
define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 x i8> %c) {
|
define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 x i8> %c) {
|
||||||
; CHECK-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
; SSE-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
||||||
; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
; SSE-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
||||||
; CHECK-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
|
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
; CHECK-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
|
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> <i32 0, i32 1>
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
; SSE-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float>
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float>
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> <i32 0, i32 3>
|
||||||
; CHECK-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
; SSE-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> <i32 0, i32 1>
|
||||||
; CHECK-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
|
; SSE-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float>
|
||||||
; CHECK-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
|
; SSE-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float>
|
||||||
; CHECK-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
|
; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 3>
|
||||||
; CHECK-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
; SSE-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
; SSE-NEXT: [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
|
; SSE-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
|
; SSE-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||||
; CHECK-NEXT: ret <8 x float> [[R7]]
|
; SSE-NEXT: ret <8 x float> [[R72]]
|
||||||
|
;
|
||||||
|
; SLM-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
||||||
|
; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
||||||
|
; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
||||||
|
; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
|
; SLM-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
||||||
|
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
|
; SLM-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
||||||
|
; SLM-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
|
||||||
|
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> <i32 0, i32 1>
|
||||||
|
; SLM-NEXT: [[TMP5:%.*]] = sitofp <2 x i8> [[TMP4]] to <2 x float>
|
||||||
|
; SLM-NEXT: [[TMP6:%.*]] = uitofp <2 x i8> [[TMP4]] to <2 x float>
|
||||||
|
; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> <i32 0, i32 3>
|
||||||
|
; SLM-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
||||||
|
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
||||||
|
; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; SLM-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R5]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||||
|
; SLM-NEXT: ret <8 x float> [[R72]]
|
||||||
|
;
|
||||||
|
; AVX-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
||||||
|
; AVX-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
||||||
|
; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
||||||
|
; AVX-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
|
||||||
|
; AVX-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
|
||||||
|
; AVX-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
|
; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
||||||
|
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
|
; AVX-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
||||||
|
; AVX-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
|
||||||
|
; AVX-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
|
||||||
|
; AVX-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
|
||||||
|
; AVX-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; AVX-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
||||||
|
; AVX-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
||||||
|
; AVX-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
|
||||||
|
; AVX-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
|
||||||
|
; AVX-NEXT: ret <8 x float> [[R7]]
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
||||||
|
; AVX512-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
||||||
|
; AVX512-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
||||||
|
; AVX512-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
|
||||||
|
; AVX512-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
|
||||||
|
; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
|
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
||||||
|
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
|
; AVX512-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
||||||
|
; AVX512-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
|
||||||
|
; AVX512-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
|
||||||
|
; AVX512-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
|
||||||
|
; AVX512-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
||||||
|
; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
||||||
|
; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
|
||||||
|
; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
|
||||||
|
; AVX512-NEXT: ret <8 x float> [[R7]]
|
||||||
;
|
;
|
||||||
%a0 = extractelement <4 x i32> %a, i32 0
|
%a0 = extractelement <4 x i32> %a, i32 0
|
||||||
%a1 = extractelement <4 x i32> %a, i32 1
|
%a1 = extractelement <4 x i32> %a, i32 1
|
||||||
|
|
|
@ -161,43 +161,12 @@ define <8 x i32> @sext_zext(<8 x i16> %a) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
|
define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
|
||||||
; SSE-LABEL: @sitofp_4i32_8i16(
|
; CHECK-LABEL: @sitofp_4i32_8i16(
|
||||||
; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2
|
; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3
|
; CHECK-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
; SSE-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
; CHECK-NEXT: ret <8 x float> [[R72]]
|
||||||
; SSE-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
|
||||||
; SSE-NEXT: [[AB5:%.*]] = sitofp i16 [[B1]] to float
|
|
||||||
; SSE-NEXT: [[AB6:%.*]] = sitofp i16 [[B2]] to float
|
|
||||||
; SSE-NEXT: [[AB7:%.*]] = sitofp i16 [[B3]] to float
|
|
||||||
; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
||||||
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
|
||||||
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
|
||||||
; SSE-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
|
|
||||||
; SSE-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
|
|
||||||
; SSE-NEXT: ret <8 x float> [[R7]]
|
|
||||||
;
|
|
||||||
; SLM-LABEL: @sitofp_4i32_8i16(
|
|
||||||
; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
|
||||||
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
; SLM-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
|
||||||
; SLM-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
||||||
; SLM-NEXT: ret <8 x float> [[R72]]
|
|
||||||
;
|
|
||||||
; AVX-LABEL: @sitofp_4i32_8i16(
|
|
||||||
; AVX-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
|
||||||
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
; AVX-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
|
||||||
; AVX-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
||||||
; AVX-NEXT: ret <8 x float> [[R72]]
|
|
||||||
;
|
|
||||||
; AVX512-LABEL: @sitofp_4i32_8i16(
|
|
||||||
; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
|
||||||
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
||||||
; AVX512-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
|
||||||
; AVX512-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
||||||
; AVX512-NEXT: ret <8 x float> [[R72]]
|
|
||||||
;
|
;
|
||||||
%a0 = extractelement <4 x i32> %a, i32 0
|
%a0 = extractelement <4 x i32> %a, i32 0
|
||||||
%a1 = extractelement <4 x i32> %a, i32 1
|
%a1 = extractelement <4 x i32> %a, i32 1
|
||||||
|
@ -228,24 +197,81 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
|
||||||
|
|
||||||
; Inspired by PR38154
|
; Inspired by PR38154
|
||||||
define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 x i8> %c) {
|
define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 x i8> %c) {
|
||||||
; CHECK-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
; SSE-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
||||||
; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
; SSE-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
||||||
; CHECK-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
|
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
; CHECK-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
|
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> <i32 0, i32 1>
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
; SSE-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float>
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float>
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> <i32 0, i32 3>
|
||||||
; CHECK-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
; SSE-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> <i32 0, i32 1>
|
||||||
; CHECK-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
|
; SSE-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float>
|
||||||
; CHECK-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
|
; SSE-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float>
|
||||||
; CHECK-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
|
; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 3>
|
||||||
; CHECK-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
; SSE-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
; SSE-NEXT: [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
|
; SSE-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
|
; SSE-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||||
; CHECK-NEXT: ret <8 x float> [[R7]]
|
; SSE-NEXT: ret <8 x float> [[R72]]
|
||||||
|
;
|
||||||
|
; SLM-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
||||||
|
; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
||||||
|
; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
||||||
|
; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
|
; SLM-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
||||||
|
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
|
; SLM-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
||||||
|
; SLM-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
|
||||||
|
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> <i32 0, i32 1>
|
||||||
|
; SLM-NEXT: [[TMP5:%.*]] = sitofp <2 x i8> [[TMP4]] to <2 x float>
|
||||||
|
; SLM-NEXT: [[TMP6:%.*]] = uitofp <2 x i8> [[TMP4]] to <2 x float>
|
||||||
|
; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> <i32 0, i32 3>
|
||||||
|
; SLM-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
||||||
|
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
||||||
|
; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; SLM-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R5]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||||
|
; SLM-NEXT: ret <8 x float> [[R72]]
|
||||||
|
;
|
||||||
|
; AVX-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
||||||
|
; AVX-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
||||||
|
; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
||||||
|
; AVX-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
|
||||||
|
; AVX-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
|
||||||
|
; AVX-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
|
; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
||||||
|
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
|
; AVX-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
||||||
|
; AVX-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
|
||||||
|
; AVX-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
|
||||||
|
; AVX-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
|
||||||
|
; AVX-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; AVX-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
||||||
|
; AVX-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
||||||
|
; AVX-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
|
||||||
|
; AVX-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
|
||||||
|
; AVX-NEXT: ret <8 x float> [[R7]]
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
|
||||||
|
; AVX512-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0
|
||||||
|
; AVX512-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
|
||||||
|
; AVX512-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0
|
||||||
|
; AVX512-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
|
||||||
|
; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
|
||||||
|
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
|
||||||
|
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
|
; AVX512-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
|
||||||
|
; AVX512-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
|
||||||
|
; AVX512-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
|
||||||
|
; AVX512-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
|
||||||
|
; AVX512-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4
|
||||||
|
; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
|
||||||
|
; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
|
||||||
|
; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
|
||||||
|
; AVX512-NEXT: ret <8 x float> [[R7]]
|
||||||
;
|
;
|
||||||
%a0 = extractelement <4 x i32> %a, i32 0
|
%a0 = extractelement <4 x i32> %a, i32 0
|
||||||
%a1 = extractelement <4 x i32> %a, i32 1
|
%a1 = extractelement <4 x i32> %a, i32 1
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
|
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ
|
||||||
|
|
||||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
|
@ -216,20 +216,14 @@ define void @sitofp_8i64_8f64() #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sitofp_2i32_2f64() #0 {
|
define void @sitofp_2i32_2f64() #0 {
|
||||||
; SSE-LABEL: @sitofp_2i32_2f64(
|
; CHECK-LABEL: @sitofp_2i32_2f64(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||||
; SSE-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
|
||||||
; AVX-LABEL: @sitofp_2i32_2f64(
|
|
||||||
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
|
||||||
; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
|
||||||
; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
||||||
; AVX-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||||
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||||
|
@ -922,26 +916,11 @@ define void @sitofp_16i32_16f32() #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sitofp_4i16_4f32() #0 {
|
define void @sitofp_4i16_4f32() #0 {
|
||||||
; SSE-LABEL: @sitofp_4i16_4f32(
|
; CHECK-LABEL: @sitofp_4i16_4f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; CHECK-NEXT: ret void
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX-LABEL: @sitofp_4i16_4f32(
|
|
||||||
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
|
||||||
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
|
||||||
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
||||||
; AVX-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
||||||
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
||||||
|
@ -960,30 +939,12 @@ define void @sitofp_4i16_4f32() #0 {
|
||||||
|
|
||||||
define void @sitofp_8i16_8f32() #0 {
|
define void @sitofp_8i16_8f32() #0 {
|
||||||
; SSE-LABEL: @sitofp_8i16_8f32(
|
; SSE-LABEL: @sitofp_8i16_8f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
|
; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
|
|
||||||
; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
|
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float
|
|
||||||
; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float
|
|
||||||
; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float
|
|
||||||
; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX-LABEL: @sitofp_8i16_8f32(
|
; AVX-LABEL: @sitofp_8i16_8f32(
|
||||||
|
@ -1021,54 +982,18 @@ define void @sitofp_8i16_8f32() #0 {
|
||||||
|
|
||||||
define void @sitofp_16i16_16f32() #0 {
|
define void @sitofp_16i16_16f32() #0 {
|
||||||
; SSE-LABEL: @sitofp_16i16_16f32(
|
; SSE-LABEL: @sitofp_16i16_16f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; SSE-NEXT: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
|
; SSE-NEXT: [[TMP5:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
|
; SSE-NEXT: [[TMP7:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
|
; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP4]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16
|
; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2
|
; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
|
; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
|
||||||
; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
|
; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
|
|
||||||
; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
|
|
||||||
; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
|
|
||||||
; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
|
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float
|
|
||||||
; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float
|
|
||||||
; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float
|
|
||||||
; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float
|
|
||||||
; SSE-NEXT: [[CVT8:%.*]] = sitofp i16 [[LD8]] to float
|
|
||||||
; SSE-NEXT: [[CVT9:%.*]] = sitofp i16 [[LD9]] to float
|
|
||||||
; SSE-NEXT: [[CVT10:%.*]] = sitofp i16 [[LD10]] to float
|
|
||||||
; SSE-NEXT: [[CVT11:%.*]] = sitofp i16 [[LD11]] to float
|
|
||||||
; SSE-NEXT: [[CVT12:%.*]] = sitofp i16 [[LD12]] to float
|
|
||||||
; SSE-NEXT: [[CVT13:%.*]] = sitofp i16 [[LD13]] to float
|
|
||||||
; SSE-NEXT: [[CVT14:%.*]] = sitofp i16 [[LD14]] to float
|
|
||||||
; SSE-NEXT: [[CVT15:%.*]] = sitofp i16 [[LD15]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32
|
|
||||||
; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
|
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256-LABEL: @sitofp_16i16_16f32(
|
; AVX256-LABEL: @sitofp_16i16_16f32(
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
|
; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
|
||||||
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ
|
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ
|
||||||
|
|
||||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
|
@ -216,20 +216,14 @@ define void @sitofp_8i64_8f64() #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sitofp_2i32_2f64() #0 {
|
define void @sitofp_2i32_2f64() #0 {
|
||||||
; SSE-LABEL: @sitofp_2i32_2f64(
|
; CHECK-LABEL: @sitofp_2i32_2f64(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||||
; SSE-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
|
||||||
; AVX-LABEL: @sitofp_2i32_2f64(
|
|
||||||
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
|
||||||
; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
|
||||||
; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
|
||||||
; AVX-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||||
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||||
|
@ -922,26 +916,11 @@ define void @sitofp_16i32_16f32() #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sitofp_4i16_4f32() #0 {
|
define void @sitofp_4i16_4f32() #0 {
|
||||||
; SSE-LABEL: @sitofp_4i16_4f32(
|
; CHECK-LABEL: @sitofp_4i16_4f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; CHECK-NEXT: ret void
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX-LABEL: @sitofp_4i16_4f32(
|
|
||||||
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
|
||||||
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
|
||||||
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
||||||
; AVX-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
||||||
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
||||||
|
@ -960,30 +939,12 @@ define void @sitofp_4i16_4f32() #0 {
|
||||||
|
|
||||||
define void @sitofp_8i16_8f32() #0 {
|
define void @sitofp_8i16_8f32() #0 {
|
||||||
; SSE-LABEL: @sitofp_8i16_8f32(
|
; SSE-LABEL: @sitofp_8i16_8f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
|
; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
|
|
||||||
; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
|
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float
|
|
||||||
; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float
|
|
||||||
; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float
|
|
||||||
; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX-LABEL: @sitofp_8i16_8f32(
|
; AVX-LABEL: @sitofp_8i16_8f32(
|
||||||
|
@ -1021,54 +982,18 @@ define void @sitofp_8i16_8f32() #0 {
|
||||||
|
|
||||||
define void @sitofp_16i16_16f32() #0 {
|
define void @sitofp_16i16_16f32() #0 {
|
||||||
; SSE-LABEL: @sitofp_16i16_16f32(
|
; SSE-LABEL: @sitofp_16i16_16f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; SSE-NEXT: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
|
; SSE-NEXT: [[TMP5:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
|
; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
|
; SSE-NEXT: [[TMP7:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
|
; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP4]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16
|
; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2
|
; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
|
; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
|
||||||
; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
|
; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
|
|
||||||
; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
|
|
||||||
; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
|
|
||||||
; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
|
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float
|
|
||||||
; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float
|
|
||||||
; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float
|
|
||||||
; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float
|
|
||||||
; SSE-NEXT: [[CVT8:%.*]] = sitofp i16 [[LD8]] to float
|
|
||||||
; SSE-NEXT: [[CVT9:%.*]] = sitofp i16 [[LD9]] to float
|
|
||||||
; SSE-NEXT: [[CVT10:%.*]] = sitofp i16 [[LD10]] to float
|
|
||||||
; SSE-NEXT: [[CVT11:%.*]] = sitofp i16 [[LD11]] to float
|
|
||||||
; SSE-NEXT: [[CVT12:%.*]] = sitofp i16 [[LD12]] to float
|
|
||||||
; SSE-NEXT: [[CVT13:%.*]] = sitofp i16 [[LD13]] to float
|
|
||||||
; SSE-NEXT: [[CVT14:%.*]] = sitofp i16 [[LD14]] to float
|
|
||||||
; SSE-NEXT: [[CVT15:%.*]] = sitofp i16 [[LD15]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32
|
|
||||||
; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
|
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256-LABEL: @sitofp_16i16_16f32(
|
; AVX256-LABEL: @sitofp_16i16_16f32(
|
||||||
|
|
|
@ -576,18 +576,9 @@ define void @uitofp_2i64_2f32() #0 {
|
||||||
|
|
||||||
define void @uitofp_4i64_4f32() #0 {
|
define void @uitofp_4i64_4f32() #0 {
|
||||||
; SSE-LABEL: @uitofp_4i64_4f32(
|
; SSE-LABEL: @uitofp_4i64_4f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256NODQ-LABEL: @uitofp_4i64_4f32(
|
; AVX256NODQ-LABEL: @uitofp_4i64_4f32(
|
||||||
|
@ -634,30 +625,12 @@ define void @uitofp_4i64_4f32() #0 {
|
||||||
|
|
||||||
define void @uitofp_8i64_8f32() #0 {
|
define void @uitofp_8i64_8f32() #0 {
|
||||||
; SSE-LABEL: @uitofp_8i64_8f32(
|
; SSE-LABEL: @uitofp_8i64_8f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
|
||||||
; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
|
|
||||||
; SSE-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to float
|
|
||||||
; SSE-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to float
|
|
||||||
; SSE-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to float
|
|
||||||
; SSE-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256NODQ-LABEL: @uitofp_8i64_8f32(
|
; AVX256NODQ-LABEL: @uitofp_8i64_8f32(
|
||||||
|
@ -874,26 +847,11 @@ define void @uitofp_16i32_16f32() #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @uitofp_4i16_4f32() #0 {
|
define void @uitofp_4i16_4f32() #0 {
|
||||||
; SSE-LABEL: @uitofp_4i16_4f32(
|
; CHECK-LABEL: @uitofp_4i16_4f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; CHECK-NEXT: ret void
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: ret void
|
|
||||||
;
|
|
||||||
; AVX-LABEL: @uitofp_4i16_4f32(
|
|
||||||
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
|
||||||
; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
|
|
||||||
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
|
||||||
; AVX-NEXT: ret void
|
|
||||||
;
|
;
|
||||||
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
||||||
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
||||||
|
@ -912,30 +870,12 @@ define void @uitofp_4i16_4f32() #0 {
|
||||||
|
|
||||||
define void @uitofp_8i16_8f32() #0 {
|
define void @uitofp_8i16_8f32() #0 {
|
||||||
; SSE-LABEL: @uitofp_8i16_8f32(
|
; SSE-LABEL: @uitofp_8i16_8f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
|
; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
|
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
|
|
||||||
; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
|
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to float
|
|
||||||
; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to float
|
|
||||||
; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to float
|
|
||||||
; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX-LABEL: @uitofp_8i16_8f32(
|
; AVX-LABEL: @uitofp_8i16_8f32(
|
||||||
|
@ -973,54 +913,18 @@ define void @uitofp_8i16_8f32() #0 {
|
||||||
|
|
||||||
define void @uitofp_16i16_16f32() #0 {
|
define void @uitofp_16i16_16f32() #0 {
|
||||||
; SSE-LABEL: @uitofp_16i16_16f32(
|
; SSE-LABEL: @uitofp_16i16_16f32(
|
||||||
; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
|
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
|
||||||
; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
|
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
|
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16
|
||||||
; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
|
; SSE-NEXT: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8
|
||||||
; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
|
; SSE-NEXT: [[TMP5:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
|
; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i16> [[TMP2]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
|
; SSE-NEXT: [[TMP7:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
|
; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i16> [[TMP4]] to <4 x float>
|
||||||
; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16
|
; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||||
; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2
|
; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
|
; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
|
||||||
; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
|
; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
|
||||||
; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
|
|
||||||
; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
|
|
||||||
; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
|
|
||||||
; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
|
|
||||||
; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float
|
|
||||||
; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float
|
|
||||||
; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float
|
|
||||||
; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float
|
|
||||||
; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to float
|
|
||||||
; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to float
|
|
||||||
; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to float
|
|
||||||
; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to float
|
|
||||||
; SSE-NEXT: [[CVT8:%.*]] = uitofp i16 [[LD8]] to float
|
|
||||||
; SSE-NEXT: [[CVT9:%.*]] = uitofp i16 [[LD9]] to float
|
|
||||||
; SSE-NEXT: [[CVT10:%.*]] = uitofp i16 [[LD10]] to float
|
|
||||||
; SSE-NEXT: [[CVT11:%.*]] = uitofp i16 [[LD11]] to float
|
|
||||||
; SSE-NEXT: [[CVT12:%.*]] = uitofp i16 [[LD12]] to float
|
|
||||||
; SSE-NEXT: [[CVT13:%.*]] = uitofp i16 [[LD13]] to float
|
|
||||||
; SSE-NEXT: [[CVT14:%.*]] = uitofp i16 [[LD14]] to float
|
|
||||||
; SSE-NEXT: [[CVT15:%.*]] = uitofp i16 [[LD15]] to float
|
|
||||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
|
||||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32
|
|
||||||
; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
|
|
||||||
; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
|
|
||||||
; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
|
|
||||||
; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
|
|
||||||
; SSE-NEXT: ret void
|
; SSE-NEXT: ret void
|
||||||
;
|
;
|
||||||
; AVX256-LABEL: @uitofp_16i16_16f32(
|
; AVX256-LABEL: @uitofp_16i16_16f32(
|
||||||
|
|
Loading…
Reference in New Issue