forked from OSchip/llvm-project
92 lines
3.9 KiB
LLVM
92 lines
3.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
|
|
|
define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noinline ssp {
|
|
; AVX-LABEL: merge_4f64_2f64_23:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vmovups 32(%rdi), %ymm0
|
|
; AVX-NEXT: retq
|
|
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
|
|
%ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
|
|
%val0 = load <2 x double>, <2 x double>* %ptr0
|
|
%val1 = load <2 x double>, <2 x double>* %ptr1
|
|
%res = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
ret <4 x double> %res
|
|
}
|
|
|
|
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
|
|
; AVX-LABEL: merge_4f64_2f64_2z:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vmovaps 32(%rdi), %xmm0
|
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
|
|
%val0 = load <2 x double>, <2 x double>* %ptr0
|
|
%res = shufflevector <2 x double> %val0, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
ret <4 x double> %res
|
|
}
|
|
|
|
define <4 x double> @merge_4f64_f64_2345(double* %ptr) nounwind uwtable noinline ssp {
|
|
; AVX-LABEL: merge_4f64_f64_2345:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vmovups 16(%rdi), %ymm0
|
|
; AVX-NEXT: retq
|
|
%ptr0 = getelementptr inbounds double, double* %ptr, i64 2
|
|
%ptr1 = getelementptr inbounds double, double* %ptr, i64 3
|
|
%ptr2 = getelementptr inbounds double, double* %ptr, i64 4
|
|
%ptr3 = getelementptr inbounds double, double* %ptr, i64 5
|
|
%val0 = load double, double* %ptr0
|
|
%val1 = load double, double* %ptr1
|
|
%val2 = load double, double* %ptr2
|
|
%val3 = load double, double* %ptr3
|
|
%res0 = insertelement <4 x double> undef, double %val0, i32 0
|
|
%res1 = insertelement <4 x double> %res0, double %val1, i32 1
|
|
%res2 = insertelement <4 x double> %res1, double %val2, i32 2
|
|
%res3 = insertelement <4 x double> %res2, double %val3, i32 3
|
|
ret <4 x double> %res3
|
|
}
|
|
|
|
define <4 x double> @merge_4f64_f64_3zuu(double* %ptr) nounwind uwtable noinline ssp {
|
|
; AVX-LABEL: merge_4f64_f64_3zuu:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; AVX-NEXT: retq
|
|
%ptr0 = getelementptr inbounds double, double* %ptr, i64 3
|
|
%val0 = load double, double* %ptr0
|
|
%res0 = insertelement <4 x double> undef, double %val0, i32 0
|
|
%res1 = insertelement <4 x double> %res0, double 0.0, i32 1
|
|
ret <4 x double> %res1
|
|
}
|
|
|
|
define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline ssp {
|
|
; AVX-LABEL: merge_4f64_f64_34uu:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vmovups 24(%rdi), %xmm0
|
|
; AVX-NEXT: retq
|
|
%ptr0 = getelementptr inbounds double, double* %ptr, i64 3
|
|
%ptr1 = getelementptr inbounds double, double* %ptr, i64 4
|
|
%val0 = load double, double* %ptr0
|
|
%val1 = load double, double* %ptr1
|
|
%res0 = insertelement <4 x double> undef, double %val0, i32 0
|
|
%res1 = insertelement <4 x double> %res0, double %val1, i32 1
|
|
ret <4 x double> %res1
|
|
}
|
|
|
|
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
|
|
; AVX-LABEL: merge_4f64_f64_45zz:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vmovups 32(%rdi), %xmm0
|
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
%ptr0 = getelementptr inbounds double, double* %ptr, i64 4
|
|
%ptr1 = getelementptr inbounds double, double* %ptr, i64 5
|
|
%val0 = load double, double* %ptr0
|
|
%val1 = load double, double* %ptr1
|
|
%res0 = insertelement <4 x double> zeroinitializer, double %val0, i32 0
|
|
%res1 = insertelement <4 x double> %res0, double %val1, i32 1
|
|
ret <4 x double> %res1
|
|
}
|