2015-11-24 05:33:58 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2019-01-18 14:06:01 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s
|
2013-09-01 22:24:41 +08:00
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dps:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i16 %mask to <16 x i1>
|
|
|
|
%x = call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %src, i8* %base, <16 x i32> %ind, <16 x i1> %1, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %stbuf, <16 x i1> %1, <16 x i32> %ind2, <16 x float> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dpd:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%x = call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double> %src, i8* %base, <8 x i32> %ind, <8 x i1> %1, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %stbuf, <8 x i1> %1, <8 x i32> %ind2, <8 x double> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qps:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%x = call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x float> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qpd:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%x = call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x double> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
;;
|
|
|
|
;; Integer Gather/Scatter
|
|
|
|
;;
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dd:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i16 %mask to <16 x i1>
|
|
|
|
%x = call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512(<16 x i32> %src, i8* %base, <16 x i32> %ind, <16 x i1> %1, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.dpi.512(i8* %stbuf, <16 x i1> %1, <16 x i32> %ind2, <16 x i32> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qd:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%x = call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512(<8 x i32> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.qpi.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x i32> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qq:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%x = call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512(<8 x i64> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.qpq.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x i64> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dq:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%x = call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512(<8 x i64> %src, i8* %base, <8 x i32> %ind, <8 x i1> %1, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.dpq.512(i8* %stbuf, <8 x i1> %1, <8 x i32> %ind2, <8 x i64> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dpd_execdomain:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
|
|
|
|
; CHECK-NEXT: vmovapd %zmm1, (%rdx)
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%x = call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double> %src, i8* %base, <8 x i32> %ind, <8 x i1> %1, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
store <8 x double> %x, <8 x double>* %stbuf
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qpd_execdomain:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
|
|
|
|
; CHECK-NEXT: vmovapd %zmm1, (%rdx)
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%x = call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
store <8 x double> %x, <8 x double>* %stbuf
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dps_execdomain:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
|
|
|
|
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i16 %mask to <16 x i1>
|
|
|
|
%res = call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %src, i8* %base, <16 x i32> %ind, <16 x i1> %1, i32 4)
|
|
|
|
ret <16 x float> %res
|
2014-03-26 21:50:50 +08:00
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qps_execdomain:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
|
2016-07-18 14:14:34 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
|
|
|
%res = call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
|
|
|
|
ret <8 x float> %res
|
2014-03-26 21:50:50 +08:00
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: scatter_mask_dpd_execdomain:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
|
|
|
; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
2015-06-28 18:53:29 +08:00
|
|
|
%x = load <8 x double>, <8 x double>* %src, align 64
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %stbuf, <8 x i1> %1, <8 x i32>%ind, <8 x double> %x, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: scatter_mask_qpd_execdomain:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
|
|
|
; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
2015-02-28 05:17:42 +08:00
|
|
|
%x = load <8 x double>, <8 x double>* %src, align 64
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %stbuf, <8 x i1> %1, <8 x i64>%ind, <8 x double> %x, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: scatter_mask_dps_execdomain:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rdi), %zmm1
|
|
|
|
; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i16 %mask to <16 x i1>
|
2015-02-28 05:17:42 +08:00
|
|
|
%x = load <16 x float>, <16 x float>* %src, align 64
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %stbuf, <16 x i1> %1, <16 x i32>%ind, <16 x float> %x, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: scatter_mask_qps_execdomain:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rdi), %ymm1
|
|
|
|
; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %mask to <8 x i1>
|
2015-06-28 18:53:29 +08:00
|
|
|
%x = load <8 x float>, <8 x float>* %src, align 32
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> %1, <8 x i64>%ind, <8 x float> %x, i32 4)
|
2014-05-12 15:18:51 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_qps:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2019-01-16 04:12:33 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%x = call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %src, i8* %base, <8 x i64> %ind, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
|
2014-05-12 15:18:51 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> %ind2, <8 x float> %x, i32 4)
|
2014-05-12 15:18:51 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
|
|
|
|
declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
|
|
|
|
define void @prefetch(<8 x i64> %ind, i8* %base) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: prefetch:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: kxorw %k0, %k0, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: movb $1, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: movb $120, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2017-04-01 01:24:29 +08:00
|
|
|
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 3)
|
2017-03-13 06:29:12 +08:00
|
|
|
call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, i8* %base, i32 4, i32 2)
|
2017-04-01 01:24:29 +08:00
|
|
|
call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, i8* %base, i32 2, i32 3)
|
2017-03-13 06:29:12 +08:00
|
|
|
call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, i8* %base, i32 2, i32 2)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret void
|
|
|
|
}
|
2015-06-28 18:53:29 +08:00
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <2 x double> @test_int_x86_avx512_mask_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div2_df:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
|
|
|
|
%res = call <2 x double> @llvm.x86.avx512.mask.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, <2 x i1> %extract, i32 4)
|
|
|
|
%res1 = call <2 x double> @llvm.x86.avx512.mask.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, <2 x i1> <i1 true, i1 true>, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <2 x double> %res, %res1
|
|
|
|
ret <2 x double> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <2 x i64> @test_int_x86_avx512_mask_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div2_di:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract1 = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
|
|
|
|
%res = call <2 x i64> @llvm.x86.avx512.mask.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, <2 x i1> %extract1, i32 8)
|
|
|
|
%2 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
|
|
|
|
%res1 = call <2 x i64> @llvm.x86.avx512.mask.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, <2 x i1> %extract, i32 8)
|
2016-09-22 00:06:10 +08:00
|
|
|
%res2 = add <2 x i64> %res, %res1
|
|
|
|
ret <2 x i64> %res2
|
2015-06-28 18:53:29 +08:00
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x double> @test_int_x86_avx512_mask_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div4_df:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-08-03 16:50:18 +08:00
|
|
|
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res = call <4 x double> @llvm.x86.avx512.mask.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, <4 x i1> %extract, i32 4)
|
|
|
|
%res1 = call <4 x double> @llvm.x86.avx512.mask.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x double> %res, %res1
|
|
|
|
ret <4 x double> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x i64> @test_int_x86_avx512_mask_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div4_di:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-08-03 16:50:18 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
|
|
|
|
; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res = call <4 x i64> @llvm.x86.avx512.mask.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, <4 x i1> %extract, i32 8)
|
|
|
|
%res1 = call <4 x i64> @llvm.x86.avx512.mask.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 8)
|
2016-09-22 00:06:10 +08:00
|
|
|
%res2 = add <4 x i64> %res, %res1
|
|
|
|
ret <4 x i64> %res2
|
2015-06-28 18:53:29 +08:00
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x float> @test_int_x86_avx512_mask_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div4_sf:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
|
|
|
|
%res = call <4 x float> @llvm.x86.avx512.mask.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, <2 x i1> %extract, i32 4)
|
|
|
|
%res1 = call <4 x float> @llvm.x86.avx512.mask.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, <2 x i1> <i1 true, i1 true>, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x float> %res, %res1
|
|
|
|
ret <4 x float> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x i32> @test_int_x86_avx512_mask_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div4_si:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2019-01-16 04:12:33 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
2019-01-16 04:12:33 +08:00
|
|
|
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
|
|
|
|
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%res = call <4 x i32> @llvm.x86.avx512.mask.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, <2 x i1> <i1 true, i1 true>, i32 4)
|
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
|
|
|
|
%res1 = call <4 x i32> @llvm.x86.avx512.mask.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, <2 x i1> %extract, i32 4)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = add <4 x i32> %res, %res1
|
|
|
|
ret <4 x i32> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x float> @test_int_x86_avx512_mask_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div8_sf:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res = call <4 x float> @llvm.x86.avx512.mask.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, <4 x i1> %extract, i32 4)
|
|
|
|
%res1 = call <4 x float> @llvm.x86.avx512.mask.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x float> %res, %res1
|
|
|
|
ret <4 x float> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x i32> @test_int_x86_avx512_mask_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div8_si:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2016-12-28 18:12:48 +08:00
|
|
|
; CHECK-NEXT: vmovdqa %xmm0, %xmm2
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
|
|
|
|
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
|
|
|
|
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract1 = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res = call <4 x i32> @llvm.x86.avx512.mask.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, <4 x i1> %extract1, i32 4)
|
|
|
|
%2 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res1 = call <4 x i32> @llvm.x86.avx512.mask.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, <4 x i1> %extract, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = add <4 x i32> %res, %res1
|
|
|
|
ret <4 x i32> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <2 x double> @test_int_x86_avx512_mask_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3siv2_df:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
|
|
|
|
%res = call <2 x double> @llvm.x86.avx512.mask.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, <2 x i1> %extract, i32 4)
|
|
|
|
%res1 = call <2 x double> @llvm.x86.avx512.mask.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, <2 x i1> <i1 true, i1 true>, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <2 x double> %res, %res1
|
|
|
|
ret <2 x double> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <2 x i64> @test_int_x86_avx512_mask_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3siv2_di:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
|
2016-09-22 00:06:10 +08:00
|
|
|
; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract1 = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
|
|
|
|
%res = call <2 x i64> @llvm.x86.avx512.mask.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, <2 x i1> %extract1, i32 8)
|
|
|
|
%2 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
|
|
|
|
%res1 = call <2 x i64> @llvm.x86.avx512.mask.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, <2 x i1> %extract, i32 8)
|
2016-09-22 00:06:10 +08:00
|
|
|
%res2 = add <2 x i64> %res, %res1
|
|
|
|
ret <2 x i64> %res2
|
2015-06-28 18:53:29 +08:00
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x double> @test_int_x86_avx512_mask_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3siv4_df:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-08-03 16:50:18 +08:00
|
|
|
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res = call <4 x double> @llvm.x86.avx512.mask.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, <4 x i1> %extract, i32 4)
|
|
|
|
%res1 = call <4 x double> @llvm.x86.avx512.mask.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x double> %res, %res1
|
|
|
|
ret <4 x double> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x i64> @test_int_x86_avx512_mask_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3siv4_di:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract1 = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res = call <4 x i64> @llvm.x86.avx512.mask.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, <4 x i1> %extract1, i32 8)
|
|
|
|
%2 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res1 = call <4 x i64> @llvm.x86.avx512.mask.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, <4 x i1> %extract, i32 8)
|
2016-09-22 00:06:10 +08:00
|
|
|
%res2 = add <4 x i64> %res, %res1
|
|
|
|
ret <4 x i64> %res2
|
2015-06-28 18:53:29 +08:00
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x float> @test_int_x86_avx512_mask_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3siv4_sf:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res = call <4 x float> @llvm.x86.avx512.mask.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, <4 x i1> %extract, i32 4)
|
|
|
|
%res1 = call <4 x float> @llvm.x86.avx512.mask.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x float> %res, %res1
|
|
|
|
ret <4 x float> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <4 x i32> @test_int_x86_avx512_mask_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3siv4_si:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2019-01-16 04:12:33 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
2019-01-16 04:12:33 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1}
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%res = call <4 x i32> @llvm.x86.avx512.mask.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
|
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%res1 = call <4 x i32> @llvm.x86.avx512.mask.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, <4 x i1> %extract, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = add <4 x i32> %res, %res1
|
|
|
|
ret <4 x i32> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <8 x float> @test_int_x86_avx512_mask_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3siv8_sf:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-08-03 16:50:18 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%res = call <8 x float> @llvm.x86.avx512.mask.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, <8 x i1> %1, i32 4)
|
|
|
|
%res1 = call <8 x float> @llvm.x86.avx512.mask.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <8 x float> %res, %res1
|
|
|
|
ret <8 x float> %res2
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <8 x i32> @test_int_x86_avx512_mask_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_mask_gather3siv8_si:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2016-12-28 18:12:48 +08:00
|
|
|
; CHECK-NEXT: vmovdqa %ymm0, %ymm2
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1}
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%1 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%res = call <8 x i32> @llvm.x86.avx512.mask.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, <8 x i1> %1, i32 4)
|
|
|
|
%2 = bitcast i8 %x3 to <8 x i1>
|
|
|
|
%res1 = call <8 x i32> @llvm.x86.avx512.mask.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, <8 x i1> %2, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = add <8 x i32> %res, %res1
|
|
|
|
ret <8 x i32> %res2
|
|
|
|
}
|
2015-06-29 20:14:24 +08:00
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv2.df(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <2 x double> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv2.df(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <2 x double> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv2.di(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <2 x i64> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv2.di(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <2 x i64> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv4.df(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x double> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv4.df(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x double> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv4.di(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x i64> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv4.di(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x i64> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv4.sf(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <4 x float> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv4.sf(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <4 x float> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv4.si(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <4 x i32> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv4.si(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <4 x i32> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv8.sf(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x float> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv8.sf(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x float> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv8.si(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x i32> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scatterdiv8.si(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x i32> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv2.df(i8* %x0, <2 x i1> <i1 true, i1 true>, <4 x i32> %x2, <2 x double> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv2.df(i8* %x0, <2 x i1> %2, <4 x i32> %x2, <2 x double> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv2.di(i8* %x0, <2 x i1> <i1 true, i1 true>, <4 x i32> %x2, <2 x i64> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv2.di(i8* %x0, <2 x i1> %2, <4 x i32> %x2, <2 x i64> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv4.df(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x double> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv4.df(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x double> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv4.di(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x i64> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv4.di(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x i64> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv4.sf(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x float> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv4.sf(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x float> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
%2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv4.si(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x i32> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv4.si(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x i32> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv8.sf(i8* %x0, <8 x i1> %1, <8 x i32> %x2, <8 x float> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv8.sf(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x float> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
%1 = bitcast i8 %x1 to <8 x i1>
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> %1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x i32> %x3, i32 4)
|
2015-06-29 20:14:24 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2016-02-01 17:57:15 +08:00
|
|
|
define void @scatter_mask_test(i8* %x0, <8 x i32> %x2, <8 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: scatter_mask_test:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
|
|
|
|
; CHECK-NEXT: kxorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: movb $1, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
|
|
|
|
; CHECK-NEXT: movb $96, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 07:36:25 +08:00
|
|
|
call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x i32> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> zeroinitializer, <8 x i32> %x2, <8 x i32> %x3, i32 4)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), <8 x i32> %x2, <8 x i32> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> bitcast (<1 x i8> <i8 96> to <8 x i1>), <8 x i32> %x2, <8 x i32> %x3, i32 4)
|
2016-02-01 17:57:15 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, i8* %base) {
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-LABEL: gather_mask_test:
|
2019-01-18 14:06:01 +08:00
|
|
|
; CHECK: # %bb.0:
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-08-03 16:50:18 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1}
|
|
|
|
; CHECK-NEXT: kxorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: vmovaps %zmm1, %zmm3
|
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1}
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 304371
2017-06-01 07:25:25 +08:00
|
|
|
; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm2
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: movw $1, %ax
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 304371
2017-06-01 07:25:25 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm1, %zmm3
|
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1}
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: movw $220, %ax
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 304371
2017-06-01 07:25:25 +08:00
|
|
|
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: vaddps %zmm2, %zmm0, %zmm0
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: retq
|
2019-01-16 04:12:33 +08:00
|
|
|
%res = call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %src, i8* %base, <16 x i32> %ind, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
|
|
|
|
%res1 = call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %src, i8* %base, <16 x i32> %ind, <16 x i1> zeroinitializer, i32 4)
|
|
|
|
%res2 = call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %src, i8* %base, <16 x i32> %ind, <16 x i1> bitcast (<1 x i16> <i16 1> to <16 x i1>), i32 4)
|
|
|
|
%res3 = call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %src, i8* %base, <16 x i32> %ind, <16 x i1> bitcast (<1 x i16> <i16 220> to <16 x i1>), i32 4)
|
2016-02-01 17:57:15 +08:00
|
|
|
%res4 = fadd <16 x float> %res, %res1
|
|
|
|
%res5 = fadd <16 x float> %res3, %res2
|
|
|
|
%res6 = fadd <16 x float> %res5, %res4
|
|
|
|
ret <16 x float> %res6
|
|
|
|
}
|
2019-01-16 04:12:33 +08:00
|
|
|
|
2019-01-18 14:06:03 +08:00
|
|
|
@x = global [1024 x float] zeroinitializer, align 16
|
|
|
|
|
|
|
|
define <8 x float> @gather_global(<8 x i64>, i32* nocapture readnone) {
|
|
|
|
; CHECK-LABEL: gather_global:
|
|
|
|
; CHECK: # %bb.0:
|
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
2019-01-19 02:22:26 +08:00
|
|
|
; CHECK-NEXT: vgatherqps x(,%zmm0,4), %ymm1 {%k1}
|
2019-01-18 14:06:03 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%3 = tail call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> zeroinitializer, i8* bitcast ([1024 x float]* @x to i8*), <8 x i64> %0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
|
|
|
|
ret <8 x float> %3
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:12:33 +08:00
|
|
|
declare <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float>, i8*, <16 x i32>, <16 x i1>, i32)
|
|
|
|
declare <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double>, i8*, <8 x i32>, <8 x i1>, i32)
|
|
|
|
declare <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float>, i8*, <8 x i64>, <8 x i1>, i32)
|
|
|
|
declare <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double>, i8*, <8 x i64>, <8 x i1>, i32)
|
|
|
|
declare <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512(<16 x i32>, i8*, <16 x i32>, <16 x i1>, i32)
|
|
|
|
declare <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512(<8 x i64>, i8*, <8 x i32>, <8 x i1>, i32)
|
|
|
|
declare <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512(<8 x i32>, i8*, <8 x i64>, <8 x i1>, i32)
|
|
|
|
declare <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512(<8 x i64>, i8*, <8 x i64>, <8 x i1>, i32)
|
|
|
|
declare <2 x double> @llvm.x86.avx512.mask.gather3div2.df(<2 x double>, i8*, <2 x i64>, <2 x i1>, i32)
|
|
|
|
declare <2 x i64> @llvm.x86.avx512.mask.gather3div2.di(<2 x i64>, i8*, <2 x i64>, <2 x i1>, i32)
|
|
|
|
declare <4 x double> @llvm.x86.avx512.mask.gather3div4.df(<4 x double>, i8*, <4 x i64>, <4 x i1>, i32)
|
|
|
|
declare <4 x i64> @llvm.x86.avx512.mask.gather3div4.di(<4 x i64>, i8*, <4 x i64>, <4 x i1>, i32)
|
|
|
|
declare <4 x float> @llvm.x86.avx512.mask.gather3div4.sf(<4 x float>, i8*, <2 x i64>, <2 x i1>, i32)
|
|
|
|
declare <4 x i32> @llvm.x86.avx512.mask.gather3div4.si(<4 x i32>, i8*, <2 x i64>, <2 x i1>, i32)
|
|
|
|
declare <4 x float> @llvm.x86.avx512.mask.gather3div8.sf(<4 x float>, i8*, <4 x i64>, <4 x i1>, i32)
|
|
|
|
declare <4 x i32> @llvm.x86.avx512.mask.gather3div8.si(<4 x i32>, i8*, <4 x i64>, <4 x i1>, i32)
|
|
|
|
declare <2 x double> @llvm.x86.avx512.mask.gather3siv2.df(<2 x double>, i8*, <4 x i32>, <2 x i1>, i32)
|
|
|
|
declare <2 x i64> @llvm.x86.avx512.mask.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, <2 x i1>, i32)
|
|
|
|
declare <4 x double> @llvm.x86.avx512.mask.gather3siv4.df(<4 x double>, i8*, <4 x i32>, <4 x i1>, i32)
|
|
|
|
declare <4 x i64> @llvm.x86.avx512.mask.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, <4 x i1>, i32)
|
|
|
|
declare <4 x float> @llvm.x86.avx512.mask.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, <4 x i1>, i32)
|
|
|
|
declare <4 x i32> @llvm.x86.avx512.mask.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, <4 x i1>, i32)
|
|
|
|
declare <8 x float> @llvm.x86.avx512.mask.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, <8 x i1>, i32)
|
|
|
|
declare <8 x i32> @llvm.x86.avx512.mask.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, <8 x i1>, i32)
|
2019-01-16 07:36:25 +08:00
|
|
|
declare void @llvm.x86.avx512.mask.scatter.dps.512(i8*, <16 x i1>, <16 x i32>, <16 x float>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatter.dpd.512(i8*, <8 x i1>, <8 x i32>, <8 x double>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatter.qps.512(i8*, <8 x i1>, <8 x i64>, <8 x float>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatter.qpd.512(i8*, <8 x i1>, <8 x i64>, <8 x double>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatter.dpi.512(i8*, <16 x i1>, <16 x i32>, <16 x i32>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatter.dpq.512(i8*, <8 x i1>, <8 x i32>, <8 x i64>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatter.qpi.512(i8*, <8 x i1>, <8 x i64>, <8 x i32>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatter.qpq.512(i8*, <8 x i1>, <8 x i64>, <8 x i64>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatterdiv2.df(i8*, <2 x i1>, <2 x i64>, <2 x double>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatterdiv2.di(i8*, <2 x i1>, <2 x i64>, <2 x i64>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatterdiv4.df(i8*, <4 x i1>, <4 x i64>, <4 x double>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatterdiv4.di(i8*, <4 x i1>, <4 x i64>, <4 x i64>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatterdiv4.sf(i8*, <2 x i1>, <2 x i64>, <4 x float>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatterdiv4.si(i8*, <2 x i1>, <2 x i64>, <4 x i32>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatterdiv8.sf(i8*, <4 x i1>, <4 x i64>, <4 x float>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scatterdiv8.si(i8*, <4 x i1>, <4 x i64>, <4 x i32>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scattersiv2.df(i8*, <2 x i1>, <4 x i32>, <2 x double>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scattersiv2.di(i8*, <2 x i1>, <4 x i32>, <2 x i64>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scattersiv4.df(i8*, <4 x i1>, <4 x i32>, <4 x double>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scattersiv4.di(i8*, <4 x i1>, <4 x i32>, <4 x i64>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scattersiv4.sf(i8*, <4 x i1>, <4 x i32>, <4 x float>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scattersiv4.si(i8*, <4 x i1>, <4 x i32>, <4 x i32>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scattersiv8.sf(i8*, <8 x i1>, <8 x i32>, <8 x float>, i32)
|
|
|
|
declare void @llvm.x86.avx512.mask.scattersiv8.si(i8*, <8 x i1>, <8 x i32>, <8 x i32>, i32)
|
|
|
|
|