2015-12-07 22:33:34 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2015-12-01 20:35:03 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
|
2013-08-19 21:26:14 +08:00
|
|
|
|
|
|
|
define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addpd512:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%add.i = fadd <8 x double> %x, %y
|
|
|
|
ret <8 x double> %add.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @addpd512fold(<8 x double> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addpd512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
|
|
|
|
ret <8 x double> %add.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addps512:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%add.i = fadd <16 x float> %x, %y
|
|
|
|
ret <16 x float> %add.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @addps512fold(<16 x float> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addps512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
|
|
|
|
ret <16 x float> %add.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: subpd512:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%sub.i = fsub <8 x double> %x, %y
|
|
|
|
ret <8 x double> %sub.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: subpd512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp2 = load <8 x double>, <8 x double>* %x, align 8
|
2013-08-19 21:26:14 +08:00
|
|
|
%sub.i = fsub <8 x double> %y, %tmp2
|
|
|
|
ret <8 x double> %sub.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: subps512:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%sub.i = fsub <16 x float> %x, %y
|
|
|
|
ret <16 x float> %sub.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: subps512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp2 = load <16 x float>, <16 x float>* %x, align 4
|
2013-08-19 21:26:14 +08:00
|
|
|
%sub.i = fsub <16 x float> %y, %tmp2
|
|
|
|
ret <16 x float> %sub.i
|
|
|
|
}
|
|
|
|
|
2013-10-21 21:27:34 +08:00
|
|
|
define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-LABEL: imulq512:
|
|
|
|
; AVX512F: ## BB#0:
|
|
|
|
; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
|
|
|
|
; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
|
|
|
|
; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
|
|
|
|
; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3
|
|
|
|
; AVX512F-NEXT: vpaddq %zmm3, %zmm2, %zmm2
|
|
|
|
; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1
|
|
|
|
; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
|
|
|
|
; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0
|
|
|
|
; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: imulq512:
|
|
|
|
; AVX512VL: ## BB#0:
|
|
|
|
; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
|
|
|
|
; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3
|
|
|
|
; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
|
|
|
|
; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3
|
|
|
|
; AVX512VL-NEXT: vpaddq %zmm3, %zmm2, %zmm2
|
|
|
|
; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm1
|
|
|
|
; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
|
|
|
|
; AVX512VL-NEXT: vpsllq $32, %zmm0, %zmm0
|
|
|
|
; AVX512VL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: imulq512:
|
|
|
|
; AVX512BW: ## BB#0:
|
|
|
|
; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
|
|
|
|
; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
|
|
|
|
; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
|
|
|
|
; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3
|
|
|
|
; AVX512BW-NEXT: vpaddq %zmm3, %zmm2, %zmm2
|
|
|
|
; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1
|
|
|
|
; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
|
|
|
|
; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0
|
|
|
|
; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: imulq512:
|
|
|
|
; AVX512DQ: ## BB#0:
|
|
|
|
; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: imulq512:
|
|
|
|
; SKX: ## BB#0:
|
|
|
|
; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
2013-10-21 21:27:34 +08:00
|
|
|
%z = mul <8 x i64>%x, %y
|
|
|
|
ret <8 x i64>%z
|
|
|
|
}
|
|
|
|
|
2013-08-19 21:26:14 +08:00
|
|
|
define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: mulpd512:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%mul.i = fmul <8 x double> %x, %y
|
|
|
|
ret <8 x double> %mul.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @mulpd512fold(<8 x double> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: mulpd512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
|
|
|
|
ret <8 x double> %mul.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: mulps512:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%mul.i = fmul <16 x float> %x, %y
|
|
|
|
ret <16 x float> %mul.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @mulps512fold(<16 x float> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: mulps512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
|
|
|
|
ret <16 x float> %mul.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: divpd512:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%div.i = fdiv <8 x double> %x, %y
|
|
|
|
ret <8 x double> %div.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @divpd512fold(<8 x double> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: divpd512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
|
|
|
|
ret <8 x double> %div.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: divps512:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%div.i = fdiv <16 x float> %x, %y
|
|
|
|
ret <16 x float> %div.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @divps512fold(<16 x float> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: divps512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
|
|
|
|
ret <16 x float> %div.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddq_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = add <8 x i64> %i, %j
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
2014-03-27 17:45:08 +08:00
|
|
|
define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddq_fold_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load <8 x i64>, <8 x i64>* %j, align 4
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <8 x i64> %i, %tmp
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddq_broadcast_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddq_broadcast2_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load i64, i64* %j
|
2014-03-27 17:45:08 +08:00
|
|
|
%j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
|
|
|
|
%j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
|
|
|
|
%j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
|
|
|
|
%j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
|
|
|
|
%j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
|
|
|
|
%j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
|
|
|
|
%j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
|
|
|
|
%j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
|
|
|
|
%x = add <8 x i64> %i, %j.7
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
2013-08-19 21:26:14 +08:00
|
|
|
define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
2014-03-27 17:45:08 +08:00
|
|
|
define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_fold_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load <16 x i32>, <16 x i32>* %j, align 4
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <16 x i32> %i, %tmp
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_broadcast_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_mask_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
|
|
|
|
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_maskz_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
|
|
|
|
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_mask_fold_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
|
|
|
|
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%j = load <16 x i32>, <16 x i32>* %j.ptr
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_mask_broadcast_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_maskz_fold_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
|
|
|
|
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%j = load <16 x i32>, <16 x i32>* %j.ptr
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_maskz_broadcast_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
2013-08-19 21:26:14 +08:00
|
|
|
define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpsubq_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = sub <8 x i64> %i, %j
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpsubd_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = sub <16 x i32> %i, %j
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpmulld_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = mul <16 x i32> %i, %j
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
2013-08-28 19:21:58 +08:00
|
|
|
declare float @sqrtf(float) readnone
|
|
|
|
define float @sqrtA(float %a) nounwind uwtable readnone ssp {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtA:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-28 19:21:58 +08:00
|
|
|
entry:
|
|
|
|
%conv1 = tail call float @sqrtf(float %a) nounwind readnone
|
|
|
|
ret float %conv1
|
|
|
|
}
|
|
|
|
|
|
|
|
declare double @sqrt(double) readnone
|
|
|
|
define double @sqrtB(double %a) nounwind uwtable readnone ssp {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtB:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-28 19:21:58 +08:00
|
|
|
entry:
|
|
|
|
%call = tail call double @sqrt(double %a) nounwind readnone
|
|
|
|
ret double %call
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
define float @sqrtC(float %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtC:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-28 19:21:58 +08:00
|
|
|
%b = call float @llvm.sqrt.f32(float %a)
|
|
|
|
ret float %b
|
|
|
|
}
|
|
|
|
|
2014-02-19 23:16:09 +08:00
|
|
|
declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
|
|
|
|
define <16 x float> @sqrtD(<16 x float> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtD:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vsqrtps %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2014-02-19 23:16:09 +08:00
|
|
|
%b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
|
|
|
|
ret <16 x float> %b
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
|
|
|
|
define <8 x double> @sqrtE(<8 x double> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtE:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2014-02-19 23:16:09 +08:00
|
|
|
%b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
|
|
|
|
ret <8 x double> %b
|
|
|
|
}
|
|
|
|
|
2013-08-28 19:21:58 +08:00
|
|
|
define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: fadd_broadcast:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-28 19:21:58 +08:00
|
|
|
%b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
|
|
|
|
ret <16 x float> %b
|
|
|
|
}
|
|
|
|
|
2013-08-19 21:26:14 +08:00
|
|
|
define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addq_broadcast:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
|
|
|
|
ret <8 x i64> %b
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: orq_broadcast:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
|
|
|
|
ret <8 x i64> %b
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: andd512fold:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
|
|
|
; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load <16 x i32>, <16 x i32>* %x, align 4
|
2013-08-19 21:26:14 +08:00
|
|
|
%b = and <16 x i32> %y, %a
|
|
|
|
ret <16 x i32> %b
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: andqbrst:
|
|
|
|
; CHECK: ## BB#0: ## %entry
|
[AVX512] Bring back vector-shuffle lowering support through broadcasts
Ffter commit at rev219046 512-bit broadcasts lowering become non-optimal. Most of tests on broadcasting and embedded broadcasting were changed and they doesn’t produce efficient code.
Example below is from commit changes (it’s the first test from test/CodeGen/X86/avx512-vbroadcast.ll):
define <16 x i32> @_inreg16xi32(i32 %a) {
; CHECK-LABEL: _inreg16xi32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastd %edi, %zmm0
+; CHECK-NEXT: vmovd %edi, %xmm0
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
+; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = insertelement <16 x i32> undef, i32 %a, i32 0
%c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32> %c
}
Here, 256-bit broadcast was generated instead of 512-bit one.
In this patch
1) I added vector-shuffle lowering through broadcasts
2) Removed asserts and branches likes because this is incorrect
- assert(Subtarget->hasDQI() && "We can only lower v8i64 with AVX-512-DQI");
3) Fixed lowering tests
llvm-svn: 220774
2014-10-28 20:28:51 +08:00
|
|
|
; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load i64, i64* %ap, align 8
|
2013-08-19 21:26:14 +08:00
|
|
|
%b = insertelement <8 x i64> undef, i64 %a, i32 0
|
|
|
|
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
|
|
|
|
%d = and <8 x i64> %p1, %c
|
|
|
|
ret <8 x i64>%d
|
2013-10-21 21:27:34 +08:00
|
|
|
}
|
2014-12-18 20:28:22 +08:00
|
|
|
|
|
|
|
define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vaddps:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = fadd <16 x float> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vmulps:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = fmul <16 x float> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vminps:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%cmp_res = fcmp olt <16 x float> %i, %j
|
|
|
|
%min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-LABEL: test_mask_vminpd:
|
|
|
|
; AVX512F: ## BB#0:
|
|
|
|
; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: test_mask_vminpd:
|
|
|
|
; AVX512VL: ## BB#0:
|
|
|
|
; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
|
|
|
|
; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: test_mask_vminpd:
|
|
|
|
; AVX512BW: ## BB#0:
|
|
|
|
; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: test_mask_vminpd:
|
|
|
|
; AVX512DQ: ## BB#0:
|
|
|
|
; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_mask_vminpd:
|
|
|
|
; SKX: ## BB#0:
|
|
|
|
; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
|
|
|
|
; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; SKX-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x double> %j, <8 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
|
|
|
|
%cmp_res = fcmp olt <8 x double> %i, %j
|
|
|
|
%min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vmaxps:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%cmp_res = fcmp ogt <16 x float> %i, %j
|
|
|
|
%max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-LABEL: test_mask_vmaxpd:
|
|
|
|
; AVX512F: ## BB#0:
|
|
|
|
; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: test_mask_vmaxpd:
|
|
|
|
; AVX512VL: ## BB#0:
|
|
|
|
; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
|
|
|
|
; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: test_mask_vmaxpd:
|
|
|
|
; AVX512BW: ## BB#0:
|
|
|
|
; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: test_mask_vmaxpd:
|
|
|
|
; AVX512DQ: ## BB#0:
|
|
|
|
; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_mask_vmaxpd:
|
|
|
|
; SKX: ## BB#0:
|
|
|
|
; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
|
|
|
|
; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
|
|
|
|
; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; SKX-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x double> %j, <8 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
|
|
|
|
%cmp_res = fcmp ogt <8 x double> %i, %j
|
|
|
|
%max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vsubps:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = fsub <16 x float> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vdivps:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
|
|
|
|
; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
|
|
|
|
; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = fdiv <16 x float> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vaddpd:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
|
|
|
|
; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
|
|
|
|
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x double> %j, <8 x i64> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
|
|
|
%x = fadd <8 x double> %i, %j
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_maskz_vaddpd:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
|
|
|
|
; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
|
|
|
|
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x i64> %mask1) nounwind readnone {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
|
|
|
%x = fadd <8 x double> %i, %j
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_fold_vaddpd:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
|
|
|
|
; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
|
|
|
|
; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x double>* %j, <8 x i64> %mask1)
|
|
|
|
nounwind {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load <8 x double>, <8 x double>* %j, align 8
|
2014-12-18 20:28:22 +08:00
|
|
|
%x = fadd <8 x double> %i, %tmp
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_maskz_fold_vaddpd:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
|
|
|
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
|
|
|
|
; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x i64> %mask1) nounwind {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load <8 x double>, <8 x double>* %j, align 8
|
2014-12-18 20:28:22 +08:00
|
|
|
%x = fadd <8 x double> %i, %tmp
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_broadcast_vaddpd:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load double, double* %j
|
2014-12-18 20:28:22 +08:00
|
|
|
%b = insertelement <8 x double> undef, double %tmp, i32 0
|
|
|
|
%c = shufflevector <8 x double> %b, <8 x double> undef,
|
|
|
|
<8 x i32> zeroinitializer
|
|
|
|
%x = fadd <8 x double> %c, %i
|
|
|
|
ret <8 x double> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_broadcast_vaddpd:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
|
|
|
|
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
|
|
|
|
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
double* %j, <8 x i64> %mask1) nounwind {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load double, double* %j
|
2014-12-18 20:28:22 +08:00
|
|
|
%b = insertelement <8 x double> undef, double %tmp, i32 0
|
|
|
|
%c = shufflevector <8 x double> %b, <8 x double> undef,
|
|
|
|
<8 x i32> zeroinitializer
|
|
|
|
%x = fadd <8 x double> %c, %i
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_maskz_broadcast_vaddpd:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
|
|
|
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
|
|
|
|
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x i64> %mask1) nounwind {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load double, double* %j
|
2014-12-18 20:28:22 +08:00
|
|
|
%b = insertelement <8 x double> undef, double %tmp, i32 0
|
|
|
|
%c = shufflevector <8 x double> %b, <8 x double> undef,
|
|
|
|
<8 x i32> zeroinitializer
|
|
|
|
%x = fadd <8 x double> %c, %i
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
2015-09-13 16:15:15 +08:00
|
|
|
|
|
|
|
define <16 x float> @test_fxor(<16 x float> %a) {
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-LABEL: test_fxor:
|
|
|
|
; AVX512F: ## BB#0:
|
|
|
|
; AVX512F-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: test_fxor:
|
|
|
|
; AVX512VL: ## BB#0:
|
|
|
|
; AVX512VL-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: test_fxor:
|
|
|
|
; AVX512BW: ## BB#0:
|
|
|
|
; AVX512BW-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: test_fxor:
|
|
|
|
; AVX512DQ: ## BB#0:
|
|
|
|
; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_fxor:
|
|
|
|
; SKX: ## BB#0:
|
|
|
|
; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
2015-09-13 16:15:15 +08:00
|
|
|
|
|
|
|
%res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
|
|
|
ret <16 x float>%res
|
|
|
|
}
|
|
|
|
|
2015-12-07 22:33:34 +08:00
|
|
|
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
|
|
|
|
; CHECK-LABEL: test_fxor_8f32:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
|
|
|
ret <8 x float>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @fabs_v8f64(<8 x double> %p)
|
|
|
|
; AVX512F-LABEL: fabs_v8f64:
|
|
|
|
; AVX512F: ## BB#0:
|
|
|
|
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fabs_v8f64:
|
|
|
|
; AVX512VL: ## BB#0:
|
|
|
|
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: fabs_v8f64:
|
|
|
|
; AVX512BW: ## BB#0:
|
|
|
|
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: fabs_v8f64:
|
|
|
|
; AVX512DQ: ## BB#0:
|
|
|
|
; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: fabs_v8f64:
|
|
|
|
; SKX: ## BB#0:
|
|
|
|
; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
{
|
|
|
|
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
|
|
|
ret <8 x double> %t
|
|
|
|
}
|
|
|
|
declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
|
|
|
|
|
|
|
define <16 x float> @fabs_v16f32(<16 x float> %p)
|
|
|
|
; AVX512F-LABEL: fabs_v16f32:
|
|
|
|
; AVX512F: ## BB#0:
|
|
|
|
; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fabs_v16f32:
|
|
|
|
; AVX512VL: ## BB#0:
|
|
|
|
; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: fabs_v16f32:
|
|
|
|
; AVX512BW: ## BB#0:
|
|
|
|
; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: fabs_v16f32:
|
|
|
|
; AVX512DQ: ## BB#0:
|
|
|
|
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: fabs_v16f32:
|
|
|
|
; SKX: ## BB#0:
|
|
|
|
; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
{
|
|
|
|
%t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|
|
|
|
ret <16 x float> %t
|
|
|
|
}
|
|
|
|
declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|