llvm-project/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll

92 lines
3.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_2f64_23:
; AVX: # BB#0:
; AVX-NEXT: vmovups 32(%rdi), %ymm0
; AVX-NEXT: retq
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
%ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
%val0 = load <2 x double>, <2 x double>* %ptr0
%val1 = load <2 x double>, <2 x double>* %ptr1
%res = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x double> %res
}
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_2f64_2z:
; AVX: # BB#0:
; AVX-NEXT: vmovaps 32(%rdi), %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
%val0 = load <2 x double>, <2 x double>* %ptr0
%res = shufflevector <2 x double> %val0, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x double> %res
}
define <4 x double> @merge_4f64_f64_2345(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_2345:
; AVX: # BB#0:
; AVX-NEXT: vmovups 16(%rdi), %ymm0
; AVX-NEXT: retq
%ptr0 = getelementptr inbounds double, double* %ptr, i64 2
%ptr1 = getelementptr inbounds double, double* %ptr, i64 3
%ptr2 = getelementptr inbounds double, double* %ptr, i64 4
%ptr3 = getelementptr inbounds double, double* %ptr, i64 5
%val0 = load double, double* %ptr0
%val1 = load double, double* %ptr1
%val2 = load double, double* %ptr2
%val3 = load double, double* %ptr3
%res0 = insertelement <4 x double> undef, double %val0, i32 0
%res1 = insertelement <4 x double> %res0, double %val1, i32 1
%res2 = insertelement <4 x double> %res1, double %val2, i32 2
%res3 = insertelement <4 x double> %res2, double %val3, i32 3
ret <4 x double> %res3
}
define <4 x double> @merge_4f64_f64_3zuu(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_3zuu:
; AVX: # BB#0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
%ptr0 = getelementptr inbounds double, double* %ptr, i64 3
%val0 = load double, double* %ptr0
%res0 = insertelement <4 x double> undef, double %val0, i32 0
%res1 = insertelement <4 x double> %res0, double 0.0, i32 1
ret <4 x double> %res1
}
define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_34uu:
; AVX: # BB#0:
; AVX-NEXT: vmovups 24(%rdi), %xmm0
; AVX-NEXT: retq
%ptr0 = getelementptr inbounds double, double* %ptr, i64 3
%ptr1 = getelementptr inbounds double, double* %ptr, i64 4
%val0 = load double, double* %ptr0
%val1 = load double, double* %ptr1
%res0 = insertelement <4 x double> undef, double %val0, i32 0
%res1 = insertelement <4 x double> %res0, double %val1, i32 1
ret <4 x double> %res1
}
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_45zz:
; AVX: # BB#0:
; AVX-NEXT: vmovups 32(%rdi), %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
%ptr0 = getelementptr inbounds double, double* %ptr, i64 4
%ptr1 = getelementptr inbounds double, double* %ptr, i64 5
%val0 = load double, double* %ptr0
%val1 = load double, double* %ptr1
%res0 = insertelement <4 x double> zeroinitializer, double %val0, i32 0
%res1 = insertelement <4 x double> %res0, double %val1, i32 1
ret <4 x double> %res1
}