forked from OSchip/llvm-project
[CostModel][X86] Adjust i32/i64 to f32/f64 scalar based on llvm-mca reports (+ Agner).
Older SSE targets have slower gpr->fpu scalar conversions - we also need to account for uitofp i32 > f32/f64 being lowered as sitofp i64 -> f32/f64
This commit is contained in:
parent
3d3c0ed932
commit
65e4240fa1
|
@ -1820,6 +1820,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
|||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, // vpternlogq
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, // vpternlogq+psrlq
|
||||
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 },
|
||||
|
@ -1835,15 +1837,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
|||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 },
|
||||
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 },
|
||||
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 },
|
||||
|
||||
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
|
||||
|
||||
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
|
||||
|
@ -2055,10 +2053,16 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
|||
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
|
||||
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB
|
||||
|
||||
{ ISD::SINT_TO_FP, MVT::f32, MVT::i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::f64, MVT::i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::f32, MVT::i64, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::f64, MVT::i64, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
|
||||
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 12 },
|
||||
|
@ -2077,6 +2081,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
|||
// These are somewhat magic numbers justified by comparing the
|
||||
// output of llvm-mca for our various supported scheduler models
|
||||
// and basing it off the worst case scenario.
|
||||
{ ISD::SINT_TO_FP, MVT::f32, MVT::i32, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::f64, MVT::i32, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::f32, MVT::i64, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::f64, MVT::i64, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 3 },
|
||||
|
@ -2086,6 +2094,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
|||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 8 },
|
||||
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i32, 3 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i32, 3 },
|
||||
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 8 },
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 9 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 },
|
||||
|
|
|
@ -70,7 +70,7 @@ define i32 @sitofp_i16_double() {
|
|||
|
||||
define i32 @sitofp_i32_double() {
|
||||
; SSE2-LABEL: 'sitofp_i32_double'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||
|
@ -112,12 +112,19 @@ define i32 @sitofp_i32_double() {
|
|||
}
|
||||
|
||||
define i32 @sitofp_i64_double() {
|
||||
; SSE-LABEL: 'sitofp_i64_double'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
; SSE2-LABEL: 'sitofp_i64_double'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'sitofp_i64_double'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'sitofp_i64_double'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
|
||||
|
@ -215,7 +222,7 @@ define i32 @sitofp_i16_float() {
|
|||
|
||||
define i32 @sitofp_i32_float() {
|
||||
; SSE2-LABEL: 'sitofp_i32_float'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_i32_f32 = sitofp i32 undef to float
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
|
||||
|
@ -263,13 +270,21 @@ define i32 @sitofp_i32_float() {
|
|||
}
|
||||
|
||||
define i32 @sitofp_i64_float() {
|
||||
; SSE-LABEL: 'sitofp_i64_float'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
; SSE2-LABEL: 'sitofp_i64_float'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'sitofp_i64_float'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'sitofp_i64_float'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
|
||||
|
|
|
@ -69,12 +69,19 @@ define i32 @uitofp_i16_double() {
|
|||
}
|
||||
|
||||
define i32 @uitofp_i32_double() {
|
||||
; SSE-LABEL: 'uitofp_i32_double'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
; SSE2-LABEL: 'uitofp_i32_double'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_i32_f64 = uitofp i32 undef to double
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'uitofp_i32_double'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'uitofp_i32_double'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double
|
||||
|
@ -214,13 +221,21 @@ define i32 @uitofp_i16_float() {
|
|||
}
|
||||
|
||||
define i32 @uitofp_i32_float() {
|
||||
; SSE-LABEL: 'uitofp_i32_float'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
; SSE2-LABEL: 'uitofp_i32_float'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; SSE42-LABEL: 'uitofp_i32_float'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX1-LABEL: 'uitofp_i32_float'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
|
||||
|
|
|
@ -216,20 +216,11 @@ define void @sitofp_8i64_8f64() #0 {
|
|||
}
|
||||
|
||||
define void @sitofp_2i32_2f64() #0 {
|
||||
; SSE-LABEL: @sitofp_2i32_2f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @sitofp_2i32_2f64(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; AVX-NEXT: ret void
|
||||
; CHECK-LABEL: @sitofp_2i32_2f64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
|
@ -242,18 +233,12 @@ define void @sitofp_2i32_2f64() #0 {
|
|||
|
||||
define void @sitofp_4i32_4f64() #0 {
|
||||
; SSE-LABEL: @sitofp_4i32_4f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[LD2]] to double
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[LD3]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
||||
; SSE-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @sitofp_4i32_4f64(
|
||||
|
@ -279,30 +264,18 @@ define void @sitofp_4i32_4f64() #0 {
|
|||
|
||||
define void @sitofp_8i32_8f64() #0 {
|
||||
; SSE-LABEL: @sitofp_8i32_8f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
|
||||
; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
|
||||
; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
|
||||
; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[LD2]] to double
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[LD3]] to double
|
||||
; SSE-NEXT: [[CVT4:%.*]] = sitofp i32 [[LD4]] to double
|
||||
; SSE-NEXT: [[CVT5:%.*]] = sitofp i32 [[LD5]] to double
|
||||
; SSE-NEXT: [[CVT6:%.*]] = sitofp i32 [[LD6]] to double
|
||||
; SSE-NEXT: [[CVT7:%.*]] = sitofp i32 [[LD7]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
||||
; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
||||
; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
||||
; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <2 x i32>*), align 16
|
||||
; SSE-NEXT: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP8:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x double>
|
||||
; SSE-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
||||
; SSE-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
|
||||
; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX256-LABEL: @sitofp_8i32_8f64(
|
||||
|
@ -623,26 +596,11 @@ define void @sitofp_2i64_2f32() #0 {
|
|||
}
|
||||
|
||||
define void @sitofp_4i64_4f32() #0 {
|
||||
; SSE-LABEL: @sitofp_4i64_4f32(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
|
||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @sitofp_4i64_4f32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||
; AVX-NEXT: ret void
|
||||
; CHECK-LABEL: @sitofp_4i64_4f32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
|
@ -661,30 +619,12 @@ define void @sitofp_4i64_4f32() #0 {
|
|||
|
||||
define void @sitofp_8i64_8f32() #0 {
|
||||
; SSE-LABEL: @sitofp_8i64_8f32(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
||||
; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
||||
; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
||||
; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
|
||||
; SSE-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
|
||||
; SSE-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
|
||||
; SSE-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
|
||||
; SSE-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
|
||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
|
||||
; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i64> [[TMP2]] to <4 x float>
|
||||
; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @sitofp_8i64_8f32(
|
||||
|
@ -1167,15 +1107,17 @@ define void @sitofp_16i8_16f32() #0 {
|
|||
|
||||
define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
|
||||
; SSE-LABEL: @sitofp_4xi32_4f64(
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to double
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to double
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to double
|
||||
; SSE-NEXT: [[RES0:%.*]] = insertelement <4 x double> poison, double [[CVT0]], i32 0
|
||||
; SSE-NEXT: [[RES1:%.*]] = insertelement <4 x double> [[RES0]], double [[CVT1]], i32 1
|
||||
; SSE-NEXT: [[RES2:%.*]] = insertelement <4 x double> [[RES1]], double [[CVT2]], i32 2
|
||||
; SSE-NEXT: [[RES3:%.*]] = insertelement <4 x double> [[RES2]], double [[CVT3]], i32 3
|
||||
; SSE-NEXT: ret <4 x double> [[RES3]]
|
||||
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0
|
||||
; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
||||
; SSE-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
|
||||
; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[A3:%.*]], i32 1
|
||||
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[RES11:%.*]] = shufflevector <4 x double> poison, <4 x double> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[RES32:%.*]] = shufflevector <4 x double> [[RES11]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
; SSE-NEXT: ret <4 x double> [[RES32]]
|
||||
;
|
||||
; AVX-LABEL: @sitofp_4xi32_4f64(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
|
||||
|
@ -1197,24 +1139,13 @@ define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
|
|||
}
|
||||
|
||||
define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
|
||||
; SSE-LABEL: @sitofp_4xi32_4f32(
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to float
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to float
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to float
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to float
|
||||
; SSE-NEXT: [[RES0:%.*]] = insertelement <4 x float> poison, float [[CVT0]], i32 0
|
||||
; SSE-NEXT: [[RES1:%.*]] = insertelement <4 x float> [[RES0]], float [[CVT1]], i32 1
|
||||
; SSE-NEXT: [[RES2:%.*]] = insertelement <4 x float> [[RES1]], float [[CVT2]], i32 2
|
||||
; SSE-NEXT: [[RES3:%.*]] = insertelement <4 x float> [[RES2]], float [[CVT3]], i32 3
|
||||
; SSE-NEXT: ret <4 x float> [[RES3]]
|
||||
;
|
||||
; AVX-LABEL: @sitofp_4xi32_4f32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
|
||||
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
||||
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
|
||||
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
|
||||
; AVX-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
|
||||
; AVX-NEXT: ret <4 x float> [[TMP5]]
|
||||
; CHECK-LABEL: @sitofp_4xi32_4f32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP5]]
|
||||
;
|
||||
%cvt0 = sitofp i32 %a0 to float
|
||||
%cvt1 = sitofp i32 %a1 to float
|
||||
|
|
|
@ -216,20 +216,11 @@ define void @sitofp_8i64_8f64() #0 {
|
|||
}
|
||||
|
||||
define void @sitofp_2i32_2f64() #0 {
|
||||
; SSE-LABEL: @sitofp_2i32_2f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @sitofp_2i32_2f64(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; AVX-NEXT: ret void
|
||||
; CHECK-LABEL: @sitofp_2i32_2f64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
|
@ -242,18 +233,12 @@ define void @sitofp_2i32_2f64() #0 {
|
|||
|
||||
define void @sitofp_4i32_4f64() #0 {
|
||||
; SSE-LABEL: @sitofp_4i32_4f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[LD2]] to double
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[LD3]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
||||
; SSE-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @sitofp_4i32_4f64(
|
||||
|
@ -279,30 +264,18 @@ define void @sitofp_4i32_4f64() #0 {
|
|||
|
||||
define void @sitofp_8i32_8f64() #0 {
|
||||
; SSE-LABEL: @sitofp_8i32_8f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
|
||||
; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
|
||||
; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
|
||||
; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[LD2]] to double
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[LD3]] to double
|
||||
; SSE-NEXT: [[CVT4:%.*]] = sitofp i32 [[LD4]] to double
|
||||
; SSE-NEXT: [[CVT5:%.*]] = sitofp i32 [[LD5]] to double
|
||||
; SSE-NEXT: [[CVT6:%.*]] = sitofp i32 [[LD6]] to double
|
||||
; SSE-NEXT: [[CVT7:%.*]] = sitofp i32 [[LD7]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
||||
; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
||||
; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
||||
; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <2 x i32>*), align 16
|
||||
; SSE-NEXT: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP8:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x double>
|
||||
; SSE-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
||||
; SSE-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
|
||||
; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX256-LABEL: @sitofp_8i32_8f64(
|
||||
|
@ -623,26 +596,11 @@ define void @sitofp_2i64_2f32() #0 {
|
|||
}
|
||||
|
||||
define void @sitofp_4i64_4f32() #0 {
|
||||
; SSE-LABEL: @sitofp_4i64_4f32(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
|
||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @sitofp_4i64_4f32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||
; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||
; AVX-NEXT: ret void
|
||||
; CHECK-LABEL: @sitofp_4i64_4f32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
|
@ -661,30 +619,12 @@ define void @sitofp_4i64_4f32() #0 {
|
|||
|
||||
define void @sitofp_8i64_8f32() #0 {
|
||||
; SSE-LABEL: @sitofp_8i64_8f32(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
|
||||
; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
|
||||
; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
|
||||
; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float
|
||||
; SSE-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float
|
||||
; SSE-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float
|
||||
; SSE-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float
|
||||
; SSE-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float
|
||||
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
|
||||
; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
|
||||
; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
|
||||
; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
|
||||
; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i64> [[TMP2]] to <4 x float>
|
||||
; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
|
||||
; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @sitofp_8i64_8f32(
|
||||
|
@ -1167,15 +1107,17 @@ define void @sitofp_16i8_16f32() #0 {
|
|||
|
||||
define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
|
||||
; SSE-LABEL: @sitofp_4xi32_4f64(
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to double
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to double
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to double
|
||||
; SSE-NEXT: [[RES0:%.*]] = insertelement <4 x double> undef, double [[CVT0]], i32 0
|
||||
; SSE-NEXT: [[RES1:%.*]] = insertelement <4 x double> [[RES0]], double [[CVT1]], i32 1
|
||||
; SSE-NEXT: [[RES2:%.*]] = insertelement <4 x double> [[RES1]], double [[CVT2]], i32 2
|
||||
; SSE-NEXT: [[RES3:%.*]] = insertelement <4 x double> [[RES2]], double [[CVT3]], i32 3
|
||||
; SSE-NEXT: ret <4 x double> [[RES3]]
|
||||
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0
|
||||
; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
||||
; SSE-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
|
||||
; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[A3:%.*]], i32 1
|
||||
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[RES11:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[RES32:%.*]] = shufflevector <4 x double> [[RES11]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
; SSE-NEXT: ret <4 x double> [[RES32]]
|
||||
;
|
||||
; AVX-LABEL: @sitofp_4xi32_4f64(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
|
||||
|
@ -1197,24 +1139,13 @@ define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
|
|||
}
|
||||
|
||||
define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
|
||||
; SSE-LABEL: @sitofp_4xi32_4f32(
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to float
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to float
|
||||
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to float
|
||||
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to float
|
||||
; SSE-NEXT: [[RES0:%.*]] = insertelement <4 x float> undef, float [[CVT0]], i32 0
|
||||
; SSE-NEXT: [[RES1:%.*]] = insertelement <4 x float> [[RES0]], float [[CVT1]], i32 1
|
||||
; SSE-NEXT: [[RES2:%.*]] = insertelement <4 x float> [[RES1]], float [[CVT2]], i32 2
|
||||
; SSE-NEXT: [[RES3:%.*]] = insertelement <4 x float> [[RES2]], float [[CVT3]], i32 3
|
||||
; SSE-NEXT: ret <4 x float> [[RES3]]
|
||||
;
|
||||
; AVX-LABEL: @sitofp_4xi32_4f32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
|
||||
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
||||
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
|
||||
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
|
||||
; AVX-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
|
||||
; AVX-NEXT: ret <4 x float> [[TMP5]]
|
||||
; CHECK-LABEL: @sitofp_4xi32_4f32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP5]]
|
||||
;
|
||||
%cvt0 = sitofp i32 %a0 to float
|
||||
%cvt1 = sitofp i32 %a1 to float
|
||||
|
|
|
@ -127,12 +127,9 @@ define void @uitofp_8i64_8f64() #0 {
|
|||
|
||||
define void @uitofp_2i32_2f64() #0 {
|
||||
; SSE-LABEL: @uitofp_2i32_2f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX1-LABEL: @uitofp_2i32_2f64(
|
||||
|
@ -173,18 +170,12 @@ define void @uitofp_2i32_2f64() #0 {
|
|||
|
||||
define void @uitofp_4i32_4f64() #0 {
|
||||
; SSE-LABEL: @uitofp_4i32_4f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: [[CVT2:%.*]] = uitofp i32 [[LD2]] to double
|
||||
; SSE-NEXT: [[CVT3:%.*]] = uitofp i32 [[LD3]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP3:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i32> [[TMP2]] to <2 x double>
|
||||
; SSE-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @uitofp_4i32_4f64(
|
||||
|
@ -210,30 +201,18 @@ define void @uitofp_4i32_4f64() #0 {
|
|||
|
||||
define void @uitofp_8i32_8f64() #0 {
|
||||
; SSE-LABEL: @uitofp_8i32_8f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
|
||||
; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
|
||||
; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
|
||||
; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
|
||||
; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
|
||||
; SSE-NEXT: [[CVT2:%.*]] = uitofp i32 [[LD2]] to double
|
||||
; SSE-NEXT: [[CVT3:%.*]] = uitofp i32 [[LD3]] to double
|
||||
; SSE-NEXT: [[CVT4:%.*]] = uitofp i32 [[LD4]] to double
|
||||
; SSE-NEXT: [[CVT5:%.*]] = uitofp i32 [[LD5]] to double
|
||||
; SSE-NEXT: [[CVT6:%.*]] = uitofp i32 [[LD6]] to double
|
||||
; SSE-NEXT: [[CVT7:%.*]] = uitofp i32 [[LD7]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
|
||||
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
|
||||
; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
|
||||
; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
|
||||
; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
|
||||
; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <2 x i32>*), align 16
|
||||
; SSE-NEXT: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6) to <2 x i32>*), align 8
|
||||
; SSE-NEXT: [[TMP5:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i32> [[TMP2]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP7:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double>
|
||||
; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i32> [[TMP4]] to <2 x double>
|
||||
; SSE-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
|
||||
; SSE-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
|
||||
; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX256-LABEL: @uitofp_8i32_8f64(
|
||||
|
|
Loading…
Reference in New Issue