2015-11-24 05:33:58 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2015-06-28 18:53:29 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
|
2013-09-01 22:24:41 +08:00
|
|
|
|
2014-05-12 15:18:51 +08:00
|
|
|
declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
|
|
|
|
declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
|
|
|
|
declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
|
|
|
|
declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
|
2013-09-01 22:24:41 +08:00
|
|
|
|
2014-05-12 15:18:51 +08:00
|
|
|
declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
|
|
|
|
declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
|
|
|
|
declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
|
|
|
|
declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
|
2013-09-01 22:24:41 +08:00
|
|
|
|
|
|
|
define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dps:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dpd:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qps:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qpd:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
;;
|
|
|
|
;; Integer Gather/Scatter
|
|
|
|
;;
|
2014-05-12 15:18:51 +08:00
|
|
|
declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
|
|
|
|
declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
|
|
|
|
declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
|
|
|
|
declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
|
2013-09-01 22:24:41 +08:00
|
|
|
|
2014-05-12 15:18:51 +08:00
|
|
|
declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
|
|
|
|
declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
|
|
|
|
declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
|
|
|
|
declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
|
2013-09-01 22:24:41 +08:00
|
|
|
|
|
|
|
define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dd:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qd:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qq:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dq:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
|
2013-09-01 22:24:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2014-03-26 21:50:50 +08:00
|
|
|
define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dpd_execdomain:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
|
|
|
|
; CHECK-NEXT: vmovapd %zmm1, (%rdx)
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
store <8 x double> %x, <8 x double>* %stbuf
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qpd_execdomain:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
|
|
|
|
; CHECK-NEXT: vmovapd %zmm1, (%rdx)
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
store <8 x double> %x, <8 x double>* %stbuf
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_dps_execdomain:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
|
|
|
|
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret <16 x float> %res;
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_mask_qps_execdomain:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
|
2016-07-18 14:14:34 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret <8 x float> %res;
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: scatter_mask_dpd_execdomain:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
|
|
|
; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%x = load <8 x double>, <8 x double>* %src, align 64
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: scatter_mask_qpd_execdomain:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
|
|
|
; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%x = load <8 x double>, <8 x double>* %src, align 64
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: scatter_mask_dps_execdomain:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rdi), %zmm1
|
|
|
|
; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%x = load <16 x float>, <16 x float>* %src, align 64
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: scatter_mask_qps_execdomain:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vmovaps (%rdi), %ymm1
|
|
|
|
; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%x = load <8 x float>, <8 x float>* %src, align 32
|
2014-05-12 15:18:51 +08:00
|
|
|
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: gather_qps:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
|
|
|
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2014-05-12 15:18:51 +08:00
|
|
|
%x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
|
|
|
|
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
|
|
|
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
|
|
|
|
declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
|
|
|
|
define void @prefetch(<8 x i64> %ind, i8* %base) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: prefetch:
|
|
|
|
; CHECK: ## BB#0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: kxorw %k0, %k0, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: movb $1, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: movb $120, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2017-04-01 01:24:29 +08:00
|
|
|
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 3)
|
2017-03-13 06:29:12 +08:00
|
|
|
call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, i8* %base, i32 4, i32 2)
|
2017-04-01 01:24:29 +08:00
|
|
|
call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, i8* %base, i32 2, i32 3)
|
2017-03-13 06:29:12 +08:00
|
|
|
call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, i8* %base, i32 2, i32 2)
|
2014-03-26 21:50:50 +08:00
|
|
|
ret void
|
|
|
|
}
|
2015-06-28 18:53:29 +08:00
|
|
|
|
|
|
|
declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
|
|
|
|
|
|
|
|
define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <2 x double> %res, %res1
|
|
|
|
ret <2 x double> %res2
|
|
|
|
}
|
|
|
|
|
2016-09-22 00:06:10 +08:00
|
|
|
declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
|
2015-06-28 18:53:29 +08:00
|
|
|
|
2016-09-22 00:06:10 +08:00
|
|
|
define <2 x i64>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2016-09-22 00:06:10 +08:00
|
|
|
%res = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
|
|
|
|
%res1 = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
|
|
|
|
%res2 = add <2 x i64> %res, %res1
|
|
|
|
ret <2 x i64> %res2
|
2015-06-28 18:53:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
|
|
|
|
|
|
|
|
define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorpd %ymm2, %ymm2, %ymm2
|
|
|
|
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x double> %res, %res1
|
|
|
|
ret <4 x double> %res2
|
|
|
|
}
|
|
|
|
|
2016-09-22 00:06:10 +08:00
|
|
|
declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
|
2015-06-28 18:53:29 +08:00
|
|
|
|
2016-09-22 00:06:10 +08:00
|
|
|
define <4 x i64>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
|
|
|
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
|
|
|
|
; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2016-09-22 00:06:10 +08:00
|
|
|
%res = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
|
|
|
|
%res1 = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
|
|
|
|
%res2 = add <4 x i64> %res, %res1
|
|
|
|
ret <4 x i64> %res2
|
2015-06-28 18:53:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
|
|
|
|
|
|
|
|
define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x float> %res, %res1
|
|
|
|
ret <4 x float> %res2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
|
|
|
|
|
|
|
|
define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
|
|
|
|
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
|
|
|
|
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
|
|
|
|
%res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
|
|
|
%res2 = add <4 x i32> %res, %res1
|
|
|
|
ret <4 x i32> %res2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
|
|
|
|
|
|
|
|
define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x float> %res, %res1
|
|
|
|
ret <4 x float> %res2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
|
|
|
|
|
|
|
|
define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2016-12-28 18:12:48 +08:00
|
|
|
; CHECK-NEXT: vmovdqa %xmm0, %xmm2
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
|
|
|
|
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
|
|
|
|
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
|
|
|
%res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
|
|
|
|
%res2 = add <4 x i32> %res, %res1
|
|
|
|
ret <4 x i32> %res2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
|
|
|
|
|
|
|
|
define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <2 x double> %res, %res1
|
|
|
|
ret <2 x double> %res2
|
|
|
|
}
|
|
|
|
|
2016-09-22 00:06:10 +08:00
|
|
|
declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
|
2015-06-28 18:53:29 +08:00
|
|
|
|
2016-09-22 00:06:10 +08:00
|
|
|
define <2 x i64>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
|
2016-09-22 00:06:10 +08:00
|
|
|
; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2016-09-22 00:06:10 +08:00
|
|
|
%res = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
|
|
|
%res1 = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
|
|
|
%res2 = add <2 x i64> %res, %res1
|
|
|
|
ret <2 x i64> %res2
|
2015-06-28 18:53:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
|
|
|
|
|
|
|
|
define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorpd %ymm2, %ymm2, %ymm2
|
|
|
|
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x double> %res, %res1
|
|
|
|
ret <4 x double> %res2
|
|
|
|
}
|
|
|
|
|
2016-09-22 00:06:10 +08:00
|
|
|
declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
|
2015-06-28 18:53:29 +08:00
|
|
|
|
2016-09-22 00:06:10 +08:00
|
|
|
define <4 x i64>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
2016-09-22 00:06:10 +08:00
|
|
|
%res = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
|
|
|
%res1 = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
|
|
|
%res2 = add <4 x i64> %res, %res1
|
|
|
|
ret <4 x i64> %res2
|
2015-06-28 18:53:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
|
|
|
|
|
|
|
|
define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <4 x float> %res, %res1
|
|
|
|
ret <4 x float> %res2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
|
|
|
|
|
|
|
|
define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1}
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = add <4 x i32> %res, %res1
|
|
|
|
ret <4 x i32> %res2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
|
|
|
|
|
|
|
|
define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
|
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm2 {%k1}
|
|
|
|
; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = fadd <8 x float> %res, %res1
|
|
|
|
ret <8 x float> %res2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
|
|
|
|
|
|
|
|
define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2016-12-28 18:12:48 +08:00
|
|
|
; CHECK-NEXT: vmovdqa %ymm0, %ymm2
|
2015-11-19 21:13:00 +08:00
|
|
|
; CHECK-NEXT: kmovq %k1, %k2
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1}
|
2015-06-28 18:53:29 +08:00
|
|
|
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
|
2015-11-02 15:24:37 +08:00
|
|
|
%res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 2)
|
2015-06-28 18:53:29 +08:00
|
|
|
%res2 = add <8 x i32> %res, %res1
|
|
|
|
ret <8 x i32> %res2
|
|
|
|
}
|
2015-06-29 20:14:24 +08:00
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k2
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
|
|
|
|
|
|
|
|
define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
|
|
|
|
; CHECK: ## BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
2015-11-02 15:24:37 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2015-06-29 20:14:24 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-11-02 15:24:37 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
|
2015-06-29 20:14:24 +08:00
|
|
|
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2016-02-01 17:57:15 +08:00
|
|
|
define void @scatter_mask_test(i8* %x0, <8 x i32> %x2, <8 x i32> %x3) {
|
|
|
|
; CHECK-LABEL: scatter_mask_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
|
|
|
|
; CHECK-NEXT: kxorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
|
|
|
|
; CHECK-NEXT: movb $1, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
|
|
|
|
; CHECK-NEXT: movb $96, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 0, <8 x i32> %x2, <8 x i32> %x3, i32 4)
|
|
|
|
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
|
|
|
|
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 96, <8 x i32> %x2, <8 x i32> %x3, i32 4)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, i8* %base) {
|
|
|
|
; CHECK-LABEL: gather_mask_test:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
2017-03-14 02:17:46 +08:00
|
|
|
; CHECK-NEXT: vxorps %zmm2, %zmm2, %zmm2
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1}
|
|
|
|
; CHECK-NEXT: kxorw %k0, %k0, %k1
|
|
|
|
; CHECK-NEXT: vmovaps %zmm1, %zmm3
|
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1}
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 304371
2017-06-01 07:25:25 +08:00
|
|
|
; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm2
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: movw $1, %ax
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 304371
2017-06-01 07:25:25 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm1, %zmm3
|
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1}
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: movw $220, %ax
|
2017-03-29 00:35:29 +08:00
|
|
|
; CHECK-NEXT: kmovd %eax, %k1
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 304371
2017-06-01 07:25:25 +08:00
|
|
|
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: vaddps %zmm2, %zmm0, %zmm0
|
2016-02-01 17:57:15 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 -1, i32 4)
|
|
|
|
%res1 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 0, i32 4)
|
|
|
|
%res2 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 1, i32 4)
|
|
|
|
%res3 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 220, i32 4)
|
|
|
|
|
|
|
|
%res4 = fadd <16 x float> %res, %res1
|
|
|
|
%res5 = fadd <16 x float> %res3, %res2
|
|
|
|
%res6 = fadd <16 x float> %res5, %res4
|
|
|
|
ret <16 x float> %res6
|
|
|
|
}
|