2016-07-09 08:19:07 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2017-06-27 18:13:56 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
|
2013-08-19 21:26:14 +08:00
|
|
|
|
|
|
|
define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addpd512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%add.i = fadd <8 x double> %x, %y
|
|
|
|
ret <8 x double> %add.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @addpd512fold(<8 x double> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addpd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
|
|
|
|
ret <8 x double> %add.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addps512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%add.i = fadd <16 x float> %x, %y
|
|
|
|
ret <16 x float> %add.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @addps512fold(<16 x float> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addps512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
|
|
|
|
ret <16 x float> %add.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: subpd512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%sub.i = fsub <8 x double> %x, %y
|
|
|
|
ret <8 x double> %sub.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: subpd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp2 = load <8 x double>, <8 x double>* %x, align 8
|
2013-08-19 21:26:14 +08:00
|
|
|
%sub.i = fsub <8 x double> %y, %tmp2
|
|
|
|
ret <8 x double> %sub.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: subps512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%sub.i = fsub <16 x float> %x, %y
|
|
|
|
ret <16 x float> %sub.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: subps512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp2 = load <16 x float>, <16 x float>* %x, align 4
|
2013-08-19 21:26:14 +08:00
|
|
|
%sub.i = fsub <16 x float> %y, %tmp2
|
|
|
|
ret <16 x float> %sub.i
|
|
|
|
}
|
|
|
|
|
2013-10-21 21:27:34 +08:00
|
|
|
define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-LABEL: imulq512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm2
|
|
|
|
; AVX512F-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
|
|
|
|
; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpaddq %zmm2, %zmm3, %zmm2
|
|
|
|
; AVX512F-NEXT: vpsllq $32, %zmm2, %zmm2
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: imulq512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2
|
|
|
|
; AVX512VL-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3
|
|
|
|
; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
|
|
|
|
; AVX512VL-NEXT: vpsllq $32, %zmm2, %zmm2
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: imulq512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm2
|
|
|
|
; AVX512BW-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
|
|
|
|
; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
|
|
|
|
; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: imulq512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: imulq512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2015-12-01 20:35:03 +08:00
|
|
|
; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
2013-10-21 21:27:34 +08:00
|
|
|
%z = mul <8 x i64>%x, %y
|
|
|
|
ret <8 x i64>%z
|
|
|
|
}
|
|
|
|
|
2016-04-17 15:25:39 +08:00
|
|
|
define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
|
|
|
|
; AVX512F-LABEL: imulq256:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm2
|
|
|
|
; AVX512F-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3
|
|
|
|
; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
|
|
|
; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: imulq256:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm2
|
|
|
|
; AVX512VL-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3
|
|
|
|
; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
|
|
|
; AVX512VL-NEXT: vpsllq $32, %ymm2, %ymm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: imulq256:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm2
|
|
|
|
; AVX512BW-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3
|
|
|
|
; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
|
|
|
; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: imulq256:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
2016-10-27 23:27:00 +08:00
|
|
|
; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: imulq256:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2016-04-17 15:25:39 +08:00
|
|
|
; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%z = mul <4 x i64>%x, %y
|
|
|
|
ret <4 x i64>%z
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
|
|
|
|
; AVX512F-LABEL: imulq128:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm2
|
|
|
|
; AVX512F-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3
|
|
|
|
; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX512F-NEXT: vpsllq $32, %xmm2, %xmm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512F-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: imulq128:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3
|
|
|
|
; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpsllq $32, %xmm2, %xmm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: imulq128:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3
|
|
|
|
; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
|
[X86][SSE] Improve lowering of vXi64 multiplies
As mentioned on PR30845, we were performing our vXi64 multiplication as:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi, 32)+ psllqi(AhiBlo, 32);
when we could avoid one of the upper shifts with:
AloBlo = pmuludq(a, b);
AloBhi = pmuludq(a, psrlqi(b, 32));
AhiBlo = pmuludq(psrlqi(a, 32), b);
return AloBlo + psllqi(AloBhi + AhiBlo, 32);
This matches the lowering on gcc/icc.
Differential Revision: https://reviews.llvm.org/D27756
llvm-svn: 290267
2016-12-22 04:00:10 +08:00
|
|
|
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: imulq128:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-10-27 23:27:00 +08:00
|
|
|
; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-04-17 15:25:39 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: imulq128:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2016-04-17 15:25:39 +08:00
|
|
|
; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%z = mul <2 x i64>%x, %y
|
|
|
|
ret <2 x i64>%z
|
|
|
|
}
|
|
|
|
|
2013-08-19 21:26:14 +08:00
|
|
|
define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: mulpd512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%mul.i = fmul <8 x double> %x, %y
|
|
|
|
ret <8 x double> %mul.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @mulpd512fold(<8 x double> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: mulpd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
|
|
|
|
ret <8 x double> %mul.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: mulps512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%mul.i = fmul <16 x float> %x, %y
|
|
|
|
ret <16 x float> %mul.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @mulps512fold(<16 x float> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: mulps512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
|
|
|
|
ret <16 x float> %mul.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: divpd512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%div.i = fdiv <8 x double> %x, %y
|
|
|
|
ret <8 x double> %div.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @divpd512fold(<8 x double> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: divpd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
|
|
|
|
ret <8 x double> %div.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: divps512:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%div.i = fdiv <16 x float> %x, %y
|
|
|
|
ret <16 x float> %div.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @divps512fold(<16 x float> %y) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: divps512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
|
|
|
%div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
|
|
|
|
ret <16 x float> %div.i
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddq_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = add <8 x i64> %i, %j
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
2014-03-27 17:45:08 +08:00
|
|
|
define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddq_fold_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load <8 x i64>, <8 x i64>* %j, align 4
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <8 x i64> %i, %tmp
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddq_broadcast_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2017-06-18 22:45:23 +08:00
|
|
|
%x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
|
2014-03-27 17:45:08 +08:00
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddq_broadcast2_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load i64, i64* %j
|
2014-03-27 17:45:08 +08:00
|
|
|
%j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
|
|
|
|
%j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
|
|
|
|
%j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
|
|
|
|
%j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
|
|
|
|
%j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
|
|
|
|
%j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
|
|
|
|
%j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
|
|
|
|
%j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
|
|
|
|
%x = add <8 x i64> %i, %j.7
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
2013-08-19 21:26:14 +08:00
|
|
|
define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
2014-03-27 17:45:08 +08:00
|
|
|
define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_fold_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load <16 x i32>, <16 x i32>* %j, align 4
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <16 x i32> %i, %tmp
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_broadcast_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2017-06-18 22:45:23 +08:00
|
|
|
%x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
2014-03-27 17:45:08 +08:00
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_mask_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_maskz_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_mask_fold_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%j = load <16 x i32>, <16 x i32>* %j.ptr
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_mask_broadcast_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
2017-06-18 22:45:23 +08:00
|
|
|
%x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
2014-03-27 17:45:08 +08:00
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_maskz_fold_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%j = load <16 x i32>, <16 x i32>* %j.ptr
|
2014-03-27 17:45:08 +08:00
|
|
|
%x = add <16 x i32> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpaddd_maskz_broadcast_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-03-27 17:45:08 +08:00
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
2017-06-18 22:45:23 +08:00
|
|
|
%x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
2014-03-27 17:45:08 +08:00
|
|
|
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
2013-08-19 21:26:14 +08:00
|
|
|
define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpsubq_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = sub <8 x i64> %i, %j
|
|
|
|
ret <8 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpsubd_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = sub <16 x i32> %i, %j
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: vpmulld_test:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%x = mul <16 x i32> %i, %j
|
|
|
|
ret <16 x i32> %x
|
|
|
|
}
|
|
|
|
|
2013-08-28 19:21:58 +08:00
|
|
|
declare float @sqrtf(float) readnone
|
|
|
|
define float @sqrtA(float %a) nounwind uwtable readnone ssp {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtA:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-28 19:21:58 +08:00
|
|
|
entry:
|
|
|
|
%conv1 = tail call float @sqrtf(float %a) nounwind readnone
|
|
|
|
ret float %conv1
|
|
|
|
}
|
|
|
|
|
|
|
|
declare double @sqrt(double) readnone
|
|
|
|
define double @sqrtB(double %a) nounwind uwtable readnone ssp {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtB:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0: # %entry
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-28 19:21:58 +08:00
|
|
|
entry:
|
|
|
|
%call = tail call double @sqrt(double %a) nounwind readnone
|
|
|
|
ret double %call
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
define float @sqrtC(float %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtC:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-28 19:21:58 +08:00
|
|
|
%b = call float @llvm.sqrt.f32(float %a)
|
|
|
|
ret float %b
|
|
|
|
}
|
|
|
|
|
2014-02-19 23:16:09 +08:00
|
|
|
declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
|
|
|
|
define <16 x float> @sqrtD(<16 x float> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtD:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsqrtps %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2014-02-19 23:16:09 +08:00
|
|
|
%b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
|
|
|
|
ret <16 x float> %b
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
|
|
|
|
define <8 x double> @sqrtE(<8 x double> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: sqrtE:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2014-02-19 23:16:09 +08:00
|
|
|
%b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
|
|
|
|
ret <8 x double> %b
|
|
|
|
}
|
|
|
|
|
2013-08-28 19:21:58 +08:00
|
|
|
define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: fadd_broadcast:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-28 19:21:58 +08:00
|
|
|
%b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
|
|
|
|
ret <16 x float> %b
|
|
|
|
}
|
|
|
|
|
2013-08-19 21:26:14 +08:00
|
|
|
define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-LABEL: addq_broadcast:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2014-10-03 08:44:46 +08:00
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
|
|
|
|
ret <8 x i64> %b
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512F-LABEL: orq_broadcast:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512F-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: orq_broadcast:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512VL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: orq_broadcast:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512BW-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: orq_broadcast:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512DQ-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: orq_broadcast:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2016-09-02 13:29:09 +08:00
|
|
|
; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
|
|
|
|
ret <8 x i64> %b
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
|
2016-08-01 15:55:33 +08:00
|
|
|
; AVX512F-LABEL: andd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0: # %entry
|
2018-10-27 01:21:26 +08:00
|
|
|
; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
2016-08-01 15:55:33 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: andd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0: # %entry
|
2018-10-27 01:21:26 +08:00
|
|
|
; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
2016-08-01 15:55:33 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: andd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0: # %entry
|
2018-10-27 01:21:26 +08:00
|
|
|
; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
2016-08-01 15:55:33 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: andd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
2016-08-01 15:55:33 +08:00
|
|
|
; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: andd512fold:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0: # %entry
|
2016-08-01 15:55:33 +08:00
|
|
|
; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load <16 x i32>, <16 x i32>* %x, align 4
|
2013-08-19 21:26:14 +08:00
|
|
|
%b = and <16 x i32> %y, %a
|
|
|
|
ret <16 x i32> %b
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512F-LABEL: andqbrst:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0: # %entry
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: andqbrst:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0: # %entry
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: andqbrst:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0: # %entry
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: andqbrst:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0: # %entry
|
2016-09-02 13:29:09 +08:00
|
|
|
; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: andqbrst:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0: # %entry
|
2016-09-02 13:29:09 +08:00
|
|
|
; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
2013-08-19 21:26:14 +08:00
|
|
|
entry:
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load i64, i64* %ap, align 8
|
2013-08-19 21:26:14 +08:00
|
|
|
%b = insertelement <8 x i64> undef, i64 %a, i32 0
|
|
|
|
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
|
|
|
|
%d = and <8 x i64> %p1, %c
|
|
|
|
ret <8 x i64>%d
|
2013-10-21 21:27:34 +08:00
|
|
|
}
|
2014-12-18 20:28:22 +08:00
|
|
|
|
|
|
|
define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vaddps:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = fadd <16 x float> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vmulps:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = fmul <16 x float> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vminps:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%cmp_res = fcmp olt <16 x float> %i, %j
|
|
|
|
%min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-LABEL: test_mask_vminpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
|
2018-01-28 04:19:09 +08:00
|
|
|
; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: test_mask_vminpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: test_mask_vminpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
|
2018-01-28 04:19:09 +08:00
|
|
|
; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: test_mask_vminpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
|
2018-01-28 04:19:09 +08:00
|
|
|
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_mask_vminpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; SKX-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x double> %j, <8 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
|
|
|
|
%cmp_res = fcmp olt <8 x double> %i, %j
|
|
|
|
%min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vmaxps:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%cmp_res = fcmp ogt <16 x float> %i, %j
|
|
|
|
%max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-LABEL: test_mask_vmaxpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
|
2018-01-28 04:19:09 +08:00
|
|
|
; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: test_mask_vmaxpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: test_mask_vmaxpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
|
2018-01-28 04:19:09 +08:00
|
|
|
; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: test_mask_vmaxpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
|
2018-01-28 04:19:09 +08:00
|
|
|
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_mask_vmaxpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; SKX-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x double> %j, <8 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
|
|
|
|
%cmp_res = fcmp ogt <8 x double> %i, %j
|
|
|
|
%max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vsubps:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = fsub <16 x float> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vdivps:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<16 x float> %j, <16 x i32> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
|
|
|
|
%x = fdiv <16 x float> %i, %j
|
|
|
|
%r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_vaddpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmq %zmm3, %zmm3, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x double> %j, <8 x i64> %mask1)
|
|
|
|
nounwind readnone {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
|
|
|
%x = fadd <8 x double> %i, %j
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_maskz_vaddpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x i64> %mask1) nounwind readnone {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
|
|
|
%x = fadd <8 x double> %i, %j
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_fold_vaddpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x double>* %j, <8 x i64> %mask1)
|
|
|
|
nounwind {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load <8 x double>, <8 x double>* %j, align 8
|
2014-12-18 20:28:22 +08:00
|
|
|
%x = fadd <8 x double> %i, %tmp
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_maskz_fold_vaddpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x i64> %mask1) nounwind {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load <8 x double>, <8 x double>* %j, align 8
|
2014-12-18 20:28:22 +08:00
|
|
|
%x = fadd <8 x double> %i, %tmp
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_broadcast_vaddpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load double, double* %j
|
2014-12-18 20:28:22 +08:00
|
|
|
%b = insertelement <8 x double> undef, double %tmp, i32 0
|
|
|
|
%c = shufflevector <8 x double> %b, <8 x double> undef,
|
|
|
|
<8 x i32> zeroinitializer
|
|
|
|
%x = fadd <8 x double> %c, %i
|
|
|
|
ret <8 x double> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_mask_broadcast_vaddpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2016-07-22 13:00:52 +08:00
|
|
|
; CHECK-NEXT: vmovapd %zmm1, %zmm0
|
2018-09-20 02:59:08 +08:00
|
|
|
; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
|
|
|
|
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1}
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
double* %j, <8 x i64> %mask1) nounwind {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load double, double* %j
|
2014-12-18 20:28:22 +08:00
|
|
|
%b = insertelement <8 x double> undef, double %tmp, i32 0
|
|
|
|
%c = shufflevector <8 x double> %b, <8 x double> undef,
|
|
|
|
<8 x i32> zeroinitializer
|
|
|
|
%x = fadd <8 x double> %c, %i
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-LABEL: test_maskz_broadcast_vaddpd:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: # %bb.0:
|
2018-01-28 04:19:09 +08:00
|
|
|
; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
|
2015-12-01 20:35:03 +08:00
|
|
|
; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; CHECK-NEXT: retq
|
2014-12-18 20:28:22 +08:00
|
|
|
<8 x i64> %mask1) nounwind {
|
|
|
|
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load double, double* %j
|
2014-12-18 20:28:22 +08:00
|
|
|
%b = insertelement <8 x double> undef, double %tmp, i32 0
|
|
|
|
%c = shufflevector <8 x double> %b, <8 x double> undef,
|
|
|
|
<8 x i32> zeroinitializer
|
|
|
|
%x = fadd <8 x double> %c, %i
|
|
|
|
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
2015-09-13 16:15:15 +08:00
|
|
|
|
|
|
|
define <16 x float> @test_fxor(<16 x float> %a) {
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-LABEL: test_fxor:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: test_fxor:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: test_fxor:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: test_fxor:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2016-08-29 12:49:31 +08:00
|
|
|
; AVX512DQ-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-01 20:35:03 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_fxor:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2016-08-29 12:49:31 +08:00
|
|
|
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-01 20:35:03 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-09-13 16:15:15 +08:00
|
|
|
|
|
|
|
%res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
|
|
|
ret <16 x float>%res
|
|
|
|
}
|
|
|
|
|
2015-12-07 22:33:34 +08:00
|
|
|
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
|
2016-08-29 12:49:31 +08:00
|
|
|
; AVX512F-LABEL: test_fxor_8f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-10-29 12:52:04 +08:00
|
|
|
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
2016-08-29 12:49:31 +08:00
|
|
|
; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: test_fxor_8f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-12-18 15:54:23 +08:00
|
|
|
; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
2016-08-29 12:49:31 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: test_fxor_8f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
2018-10-29 12:52:04 +08:00
|
|
|
; AVX512BW-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
2016-08-29 12:49:31 +08:00
|
|
|
; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: test_fxor_8f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-10-29 12:52:04 +08:00
|
|
|
; AVX512DQ-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
2016-08-29 12:49:31 +08:00
|
|
|
; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_fxor_8f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2016-08-29 12:49:31 +08:00
|
|
|
; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
|
|
|
; SKX-NEXT: retq
|
2015-12-07 22:33:34 +08:00
|
|
|
%res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
|
|
|
ret <8 x float>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @fabs_v8f64(<8 x double> %p)
|
2016-08-01 01:15:07 +08:00
|
|
|
; AVX512F-LABEL: fabs_v8f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
2016-08-01 01:15:07 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fabs_v8f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
2016-08-01 01:15:07 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: fabs_v8f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
2016-08-01 01:15:07 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: fabs_v8f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2016-08-29 12:49:31 +08:00
|
|
|
; AVX512DQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
2016-08-01 01:15:07 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: fabs_v8f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2016-08-29 12:49:31 +08:00
|
|
|
; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
2016-08-01 01:15:07 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-07 22:33:34 +08:00
|
|
|
{
|
|
|
|
%t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
|
|
|
ret <8 x double> %t
|
|
|
|
}
|
|
|
|
declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
|
|
|
|
|
|
|
define <16 x float> @fabs_v16f32(<16 x float> %p)
|
|
|
|
; AVX512F-LABEL: fabs_v16f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-07 22:33:34 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fabs_v16f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-07 22:33:34 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512BW-LABEL: fabs_v16f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512BW: # %bb.0:
|
2016-09-02 13:29:13 +08:00
|
|
|
; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-07 22:33:34 +08:00
|
|
|
; AVX512BW-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: fabs_v16f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2016-08-29 12:49:31 +08:00
|
|
|
; AVX512DQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-07 22:33:34 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: fabs_v16f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: # %bb.0:
|
2016-08-29 12:49:31 +08:00
|
|
|
; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
2015-12-07 22:33:34 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
{
|
|
|
|
%t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|
|
|
|
ret <16 x float> %t
|
|
|
|
}
|
|
|
|
declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|