llvm-project/llvm/test/CodeGen/X86/sse-cvttp2si.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.1           | FileCheck %s --check-prefixes=SSE
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx              | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512

; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751
; We can't combine into 'round' instructions because the behavior is different for out-of-range values.

declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>)
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)

define float @float_to_int_to_float_mem_f32_i32(<4 x float>* %p) {
; SSE-LABEL: float_to_int_to_float_mem_f32_i32:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttss2si (%rdi), %eax
; SSE-NEXT:    cvtsi2ssl %eax, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_mem_f32_i32:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttss2si (%rdi), %eax
; AVX-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
  %x = load <4 x float>, <4 x float>* %p, align 16
  %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)
  %sitofp = sitofp i32 %fptosi to float
  ret float %sitofp
}

define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) {
; SSE-LABEL: float_to_int_to_float_reg_f32_i32:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttss2si %xmm0, %eax
; SSE-NEXT:    xorps %xmm0, %xmm0
; SSE-NEXT:    cvtsi2ssl %eax, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_reg_f32_i32:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttss2si %xmm0, %eax
; AVX-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
; AVX-NEXT:    retq
  %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)
  %sitofp = sitofp i32 %fptosi to float
  ret float %sitofp
}

define float @float_to_int_to_float_mem_f32_i64(<4 x float>* %p) {
; SSE-LABEL: float_to_int_to_float_mem_f32_i64:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttss2si (%rdi), %rax
; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_mem_f32_i64:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttss2si (%rdi), %rax
; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
; AVX-NEXT:    retq
  %x = load <4 x float>, <4 x float>* %p, align 16
  %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)
  %sitofp = sitofp i64 %fptosi to float
  ret float %sitofp
}

define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) {
; SSE-LABEL: float_to_int_to_float_reg_f32_i64:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttss2si %xmm0, %rax
; SSE-NEXT:    xorps %xmm0, %xmm0
; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_reg_f32_i64:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttss2si %xmm0, %rax
; AVX-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm0
; AVX-NEXT:    retq
  %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)
  %sitofp = sitofp i64 %fptosi to float
  ret float %sitofp
}

define double @float_to_int_to_float_mem_f64_i32(<2 x double>* %p) {
; SSE-LABEL: float_to_int_to_float_mem_f64_i32:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttsd2si (%rdi), %eax
; SSE-NEXT:    cvtsi2sdl %eax, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_mem_f64_i32:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttsd2si (%rdi), %eax
; AVX-NEXT:    vcvtsi2sdl %eax, %xmm0, %xmm0
; AVX-NEXT:    retq
  %x = load <2 x double>, <2 x double>* %p, align 16
  %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)
  %sitofp = sitofp i32 %fptosi to double
  ret double %sitofp
}

define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) {
; SSE-LABEL: float_to_int_to_float_reg_f64_i32:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttsd2si %xmm0, %eax
; SSE-NEXT:    xorps %xmm0, %xmm0
; SSE-NEXT:    cvtsi2sdl %eax, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_reg_f64_i32:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttsd2si %xmm0, %eax
; AVX-NEXT:    vcvtsi2sdl %eax, %xmm1, %xmm0
; AVX-NEXT:    retq
  %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)
  %sitofp = sitofp i32 %fptosi to double
  ret double %sitofp
}

define double @float_to_int_to_float_mem_f64_i64(<2 x double>* %p) {
; SSE-LABEL: float_to_int_to_float_mem_f64_i64:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttsd2si (%rdi), %rax
; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_mem_f64_i64:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttsd2si (%rdi), %rax
; AVX-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm0
; AVX-NEXT:    retq
  %x = load <2 x double>, <2 x double>* %p, align 16
  %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)
  %sitofp = sitofp i64 %fptosi to double
  ret double %sitofp
}

define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) {
; SSE-LABEL: float_to_int_to_float_reg_f64_i64:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttsd2si %xmm0, %rax
; SSE-NEXT:    xorps %xmm0, %xmm0
; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_reg_f64_i64:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttsd2si %xmm0, %rax
; AVX-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm0
; AVX-NEXT:    retq
  %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)
  %sitofp = sitofp i64 %fptosi to double
  ret double %sitofp
}

define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) {
; SSE-LABEL: float_to_int_to_float_mem_v4f32:
; SSE:       # %bb.0:
; SSE-NEXT:    roundps $11, (%rdi), %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_mem_v4f32:
; AVX:       # %bb.0:
; AVX-NEXT:    vroundps $11, (%rdi), %xmm0
; AVX-NEXT:    retq
  %x = load <4 x float>, <4 x float>* %p, align 16
  %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)
  %sitofp = sitofp <4 x i32> %fptosi to <4 x float>
  ret <4 x float> %sitofp
}

define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) {
; SSE-LABEL: float_to_int_to_float_reg_v4f32:
; SSE:       # %bb.0:
; SSE-NEXT:    roundps $11, %xmm0, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_reg_v4f32:
; AVX:       # %bb.0:
; AVX-NEXT:    vroundps $11, %xmm0, %xmm0
; AVX-NEXT:    retq
  %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)
  %sitofp = sitofp <4 x i32> %fptosi to <4 x float>
  ret <4 x float> %sitofp
}

define <2 x double> @float_to_int_to_float_mem_v2f64(<2 x double>* %p) {
; SSE-LABEL: float_to_int_to_float_mem_v2f64:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttpd2dq (%rdi), %xmm0
; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_mem_v2f64:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttpd2dqx (%rdi), %xmm0
; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT:    retq
  %x = load <2 x double>, <2 x double>* %p, align 16
  %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x)
  %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
  %sitofp = sitofp <2 x i32> %concat to <2 x double>
  ret <2 x double> %sitofp
}

define <2 x double> @float_to_int_to_float_reg_v2f64(<2 x double> %x) {
; SSE-LABEL: float_to_int_to_float_reg_v2f64:
; SSE:       # %bb.0:
; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: float_to_int_to_float_reg_v2f64:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT:    retq
  %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x)
  %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
  %sitofp = sitofp <2 x i32> %concat to <2 x double>
  ret <2 x double> %sitofp
}
[x86] add tests for potentially miscompiling cvttp2si (PR37751); NFC llvm-svn: 334367 2018-06-11 01:42:12 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.1 \| FileCheck %s --check-prefixes=SSE`
			`; RUN: llc < %s -mtriple=x86_64-- -mattr=avx \| FileCheck %s --check-prefixes=AVX,AVX1`
			`; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl \| FileCheck %s --check-prefixes=AVX,AVX512`

[x86] add scalar cvtt intrinsic tests; NFC More coverage for the problem noted in D47993 (although these shouldn't be affected by that patch). llvm-svn: 334404 2018-06-11 21:51:34 +08:00			`; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751`
[x86] add tests for potentially miscompiling cvttp2si (PR37751); NFC llvm-svn: 334367 2018-06-11 01:42:12 +08:00			`; We can't combine into 'round' instructions because the behavior is different for out-of-range values.`

			`declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)`
			`declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>)`
[x86] add scalar cvtt intrinsic tests; NFC More coverage for the problem noted in D47993 (although these shouldn't be affected by that patch). llvm-svn: 334404 2018-06-11 21:51:34 +08:00			`declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)`
			`declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)`
			`declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)`
			`declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)`

			`define float @float_to_int_to_float_mem_f32_i32(<4 x float>* %p) {`
			`; SSE-LABEL: float_to_int_to_float_mem_f32_i32:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttss2si (%rdi), %eax`
			`; SSE-NEXT: cvtsi2ssl %eax, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_mem_f32_i32:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttss2si (%rdi), %eax`
			`; AVX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%x = load <4 x float>, <4 x float>* %p, align 16`
			`%fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)`
			`%sitofp = sitofp i32 %fptosi to float`
			`ret float %sitofp`
			`}`

			`define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) {`
			`; SSE-LABEL: float_to_int_to_float_reg_f32_i32:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttss2si %xmm0, %eax`
			`; SSE-NEXT: xorps %xmm0, %xmm0`
			`; SSE-NEXT: cvtsi2ssl %eax, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_reg_f32_i32:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttss2si %xmm0, %eax`
			`; AVX-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0`
			`; AVX-NEXT: retq`
			`%fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)`
			`%sitofp = sitofp i32 %fptosi to float`
			`ret float %sitofp`
			`}`

			`define float @float_to_int_to_float_mem_f32_i64(<4 x float>* %p) {`
			`; SSE-LABEL: float_to_int_to_float_mem_f32_i64:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttss2si (%rdi), %rax`
			`; SSE-NEXT: cvtsi2ssq %rax, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_mem_f32_i64:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttss2si (%rdi), %rax`
			`; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%x = load <4 x float>, <4 x float>* %p, align 16`
			`%fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)`
			`%sitofp = sitofp i64 %fptosi to float`
			`ret float %sitofp`
			`}`

			`define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) {`
			`; SSE-LABEL: float_to_int_to_float_reg_f32_i64:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttss2si %xmm0, %rax`
			`; SSE-NEXT: xorps %xmm0, %xmm0`
			`; SSE-NEXT: cvtsi2ssq %rax, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_reg_f32_i64:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttss2si %xmm0, %rax`
			`; AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm0`
			`; AVX-NEXT: retq`
			`%fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)`
			`%sitofp = sitofp i64 %fptosi to float`
			`ret float %sitofp`
			`}`

			`define double @float_to_int_to_float_mem_f64_i32(<2 x double>* %p) {`
			`; SSE-LABEL: float_to_int_to_float_mem_f64_i32:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttsd2si (%rdi), %eax`
			`; SSE-NEXT: cvtsi2sdl %eax, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_mem_f64_i32:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttsd2si (%rdi), %eax`
			`; AVX-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%x = load <2 x double>, <2 x double>* %p, align 16`
			`%fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)`
			`%sitofp = sitofp i32 %fptosi to double`
			`ret double %sitofp`
			`}`

			`define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) {`
			`; SSE-LABEL: float_to_int_to_float_reg_f64_i32:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttsd2si %xmm0, %eax`
			`; SSE-NEXT: xorps %xmm0, %xmm0`
			`; SSE-NEXT: cvtsi2sdl %eax, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_reg_f64_i32:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttsd2si %xmm0, %eax`
			`; AVX-NEXT: vcvtsi2sdl %eax, %xmm1, %xmm0`
			`; AVX-NEXT: retq`
			`%fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)`
			`%sitofp = sitofp i32 %fptosi to double`
			`ret double %sitofp`
			`}`

			`define double @float_to_int_to_float_mem_f64_i64(<2 x double>* %p) {`
			`; SSE-LABEL: float_to_int_to_float_mem_f64_i64:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttsd2si (%rdi), %rax`
			`; SSE-NEXT: cvtsi2sdq %rax, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_mem_f64_i64:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttsd2si (%rdi), %rax`
			`; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%x = load <2 x double>, <2 x double>* %p, align 16`
			`%fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)`
			`%sitofp = sitofp i64 %fptosi to double`
			`ret double %sitofp`
			`}`

			`define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) {`
			`; SSE-LABEL: float_to_int_to_float_reg_f64_i64:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttsd2si %xmm0, %rax`
			`; SSE-NEXT: xorps %xmm0, %xmm0`
			`; SSE-NEXT: cvtsi2sdq %rax, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_reg_f64_i64:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttsd2si %xmm0, %rax`
			`; AVX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm0`
			`; AVX-NEXT: retq`
			`%fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)`
			`%sitofp = sitofp i64 %fptosi to double`
			`ret double %sitofp`
			`}`
[x86] add tests for potentially miscompiling cvttp2si (PR37751); NFC llvm-svn: 334367 2018-06-11 01:42:12 +08:00
			`define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) {`
			`; SSE-LABEL: float_to_int_to_float_mem_v4f32:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: roundps $11, (%rdi), %xmm0`
			`; SSE-NEXT: retq`
			`;`
[X86] Add isel patterns for folding loads when creating ROUND instructions from ffloor/fnearbyint/fceil/frint/ftrunc. We were missing packed isel folding patterns for all of sse41, avx, and avx512. For some reason avx512 had scalar load folding patterns under optsize(due to partial/undef reg update), but we didn't have the equivalent sse41 and avx patterns. Sometimes we would get load folding due to peephole pass anyway, but we're also missing avx512 instructions from the load folding table. I'll try to fix that in another patch. Some of this was spotted in the review for D47993. This patch adds all the folds to isel, adds a few spot tests, and disables the peephole pass on a few tests to ensure we're testing some of these patterns. llvm-svn: 334460 2018-06-12 08:48:57 +08:00			`; AVX-LABEL: float_to_int_to_float_mem_v4f32:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vroundps $11, (%rdi), %xmm0`
			`; AVX-NEXT: retq`
[x86] add tests for potentially miscompiling cvttp2si (PR37751); NFC llvm-svn: 334367 2018-06-11 01:42:12 +08:00			`%x = load <4 x float>, <4 x float>* %p, align 16`
			`%fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)`
			`%sitofp = sitofp <4 x i32> %fptosi to <4 x float>`
			`ret <4 x float> %sitofp`
			`}`

			`define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) {`
			`; SSE-LABEL: float_to_int_to_float_reg_v4f32:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: roundps $11, %xmm0, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_reg_v4f32:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vroundps $11, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x)`
			`%sitofp = sitofp <4 x i32> %fptosi to <4 x float>`
			`ret <4 x float> %sitofp`
			`}`

			`define <2 x double> @float_to_int_to_float_mem_v2f64(<2 x double>* %p) {`
			`; SSE-LABEL: float_to_int_to_float_mem_v2f64:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttpd2dq (%rdi), %xmm0`
			`; SSE-NEXT: cvtdq2pd %xmm0, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_mem_v2f64:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttpd2dqx (%rdi), %xmm0`
			`; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%x = load <2 x double>, <2 x double>* %p, align 16`
			`%fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x)`
			`%concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1>`
			`%sitofp = sitofp <2 x i32> %concat to <2 x double>`
			`ret <2 x double> %sitofp`
			`}`

			`define <2 x double> @float_to_int_to_float_reg_v2f64(<2 x double> %x) {`
			`; SSE-LABEL: float_to_int_to_float_reg_v2f64:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: cvttpd2dq %xmm0, %xmm0`
			`; SSE-NEXT: cvtdq2pd %xmm0, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: float_to_int_to_float_reg_v2f64:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0`
			`; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x)`
			`%concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1>`
			`%sitofp = sitofp <2 x i32> %concat to <2 x double>`
			`ret <2 x double> %sitofp`
			`}`