2016-08-26 06:48:11 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2016-09-13 02:50:47 +08:00
|
|
|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64
|
|
|
|
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32
|
2017-11-14 01:53:59 +08:00
|
|
|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_SMALL
|
|
|
|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq -code-model=large < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_LARGE
|
2016-09-13 02:50:47 +08:00
|
|
|
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32
|
2017-05-15 19:30:54 +08:00
|
|
|
; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
|
2016-02-17 22:04:33 +08:00
|
|
|
; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null
|
2015-05-03 15:12:25 +08:00
|
|
|
|
2017-11-11 02:48:18 +08:00
|
|
|
@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16
|
|
|
|
|
2015-10-25 23:37:55 +08:00
|
|
|
; SCALAR-LABEL: test1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SCALAR: extractelement <16 x float*>
|
2015-10-25 23:37:55 +08:00
|
|
|
; SCALAR-NEXT: load float
|
|
|
|
; SCALAR-NEXT: insertelement <16 x float>
|
|
|
|
; SCALAR-NEXT: extractelement <16 x float*>
|
|
|
|
; SCALAR-NEXT: load float
|
|
|
|
|
2015-05-03 15:12:25 +08:00
|
|
|
define <16 x float> @test1(float* %base, <16 x i32> %ind) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test1:
|
|
|
|
; KNL_64: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test1:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test1:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test1:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-05-03 15:12:25 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
|
|
|
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
2015-12-15 16:40:41 +08:00
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
2015-05-03 15:12:25 +08:00
|
|
|
ret <16 x float>%res
|
|
|
|
}
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
|
|
|
|
declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
|
|
|
|
declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
|
2015-12-15 16:40:41 +08:00
|
|
|
|
2015-10-25 23:37:55 +08:00
|
|
|
|
|
|
|
; SCALAR-LABEL: test2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SCALAR: extractelement <16 x float*>
|
2015-10-25 23:37:55 +08:00
|
|
|
; SCALAR-NEXT: load float
|
|
|
|
; SCALAR-NEXT: insertelement <16 x float>
|
|
|
|
; SCALAR-NEXT: br label %else
|
|
|
|
; SCALAR: else:
|
2015-12-15 16:40:41 +08:00
|
|
|
; SCALAR-NEXT: %res.phi.else = phi
|
2015-10-25 23:37:55 +08:00
|
|
|
; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1
|
|
|
|
; SCALAR-NEXT: %ToLoad1 = icmp eq i1 %Mask1, true
|
|
|
|
; SCALAR-NEXT: br i1 %ToLoad1, label %cond.load1, label %else2
|
|
|
|
|
2015-05-03 15:12:25 +08:00
|
|
|
define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test2:
|
|
|
|
; KNL_64: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: kmovw %esi, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test2:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test2:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: kmovw %esi, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test2:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-05-03 15:12:25 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
|
|
|
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
|
|
|
%imask = bitcast i16 %mask to <16 x i1>
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
|
2015-05-03 15:12:25 +08:00
|
|
|
ret <16 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test3:
|
|
|
|
; KNL_64: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: kmovw %esi, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test3:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_32-NEXT: vpgatherqd (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test3:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: kmovw %esi, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test3:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX_32-NEXT: vpgatherqd (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX_32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-05-03 15:12:25 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
|
|
|
%gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind
|
|
|
|
%imask = bitcast i16 %mask to <16 x i1>
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
|
2015-05-03 15:12:25 +08:00
|
|
|
ret <16 x i32> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test4:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: kmovw %esi, %k1
|
|
|
|
; KNL_64-NEXT: kmovw %k1, %k2
|
|
|
|
; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
|
|
|
|
; KNL_64-NEXT: vpaddd %zmm2, %zmm1, %zmm0
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test4:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
|
|
; KNL_32-NEXT: kmovw %k1, %k2
|
|
|
|
; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
|
|
|
|
; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test4:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: kmovw %esi, %k1
|
|
|
|
; SKX-NEXT: kmovw %k1, %k2
|
|
|
|
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX-NEXT: vmovdqa64 %zmm1, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
|
|
|
|
; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test4:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
|
|
; SKX_32-NEXT: kmovw %k1, %k2
|
|
|
|
; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
|
|
|
|
; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2
|
|
|
|
; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
|
|
|
|
; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
|
|
|
|
; SKX_32-NEXT: retl
|
2015-05-03 15:12:25 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
|
|
|
|
%imask = bitcast i16 %mask to <16 x i1>
|
2017-05-03 20:28:54 +08:00
|
|
|
%gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
|
|
|
|
%gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
|
2015-05-03 15:12:25 +08:00
|
|
|
%res = add <16 x i32> %gt1, %gt2
|
|
|
|
ret <16 x i32> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-10-25 23:37:55 +08:00
|
|
|
; SCALAR-LABEL: test5
|
|
|
|
; SCALAR: %Mask0 = extractelement <16 x i1> %imask, i32 0
|
|
|
|
; SCALAR-NEXT: %ToStore0 = icmp eq i1 %Mask0, true
|
|
|
|
; SCALAR-NEXT: br i1 %ToStore0, label %cond.store, label %else
|
|
|
|
; SCALAR: cond.store:
|
|
|
|
; SCALAR-NEXT: %Elt0 = extractelement <16 x i32> %val, i32 0
|
|
|
|
; SCALAR-NEXT: %Ptr0 = extractelement <16 x i32*> %gep.random, i32 0
|
|
|
|
; SCALAR-NEXT: store i32 %Elt0, i32* %Ptr0, align 4
|
|
|
|
; SCALAR-NEXT: br label %else
|
|
|
|
; SCALAR: else:
|
|
|
|
; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1
|
|
|
|
; SCALAR-NEXT: %ToStore1 = icmp eq i1 %Mask1, true
|
|
|
|
; SCALAR-NEXT: br i1 %ToStore1, label %cond.store1, label %else2
|
|
|
|
|
2015-05-03 15:12:25 +08:00
|
|
|
define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test5:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: kmovw %esi, %k1
|
|
|
|
; KNL_64-NEXT: kmovw %k1, %k2
|
|
|
|
; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
|
|
|
|
; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test5:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
|
|
; KNL_32-NEXT: kmovw %k1, %k2
|
|
|
|
; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2}
|
|
|
|
; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test5:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: kmovw %esi, %k1
|
|
|
|
; SKX-NEXT: kmovw %k1, %k2
|
|
|
|
; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
|
|
|
|
; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test5:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
|
|
; SKX_32-NEXT: kmovw %k1, %k2
|
|
|
|
; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2}
|
|
|
|
; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX_32-NEXT: vzeroupper
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-05-03 15:12:25 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
|
|
|
|
%imask = bitcast i16 %mask to <16 x i1>
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
|
|
|
|
call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
|
2015-05-03 15:12:25 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
|
|
|
|
declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
|
2015-05-03 15:12:25 +08:00
|
|
|
|
2015-10-25 23:37:55 +08:00
|
|
|
|
|
|
|
; SCALAR-LABEL: test6
|
|
|
|
; SCALAR: store i32 %Elt0, i32* %Ptr01, align 4
|
|
|
|
; SCALAR-NEXT: %Elt1 = extractelement <8 x i32> %a1, i32 1
|
|
|
|
; SCALAR-NEXT: %Ptr12 = extractelement <8 x i32*> %ptr, i32 1
|
|
|
|
; SCALAR-NEXT: store i32 %Elt1, i32* %Ptr12, align 4
|
|
|
|
; SCALAR-NEXT: %Elt2 = extractelement <8 x i32> %a1, i32 2
|
|
|
|
; SCALAR-NEXT: %Ptr23 = extractelement <8 x i32*> %ptr, i32 2
|
|
|
|
; SCALAR-NEXT: store i32 %Elt2, i32* %Ptr23, align 4
|
|
|
|
|
2015-05-03 15:12:25 +08:00
|
|
|
define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test6:
|
|
|
|
; KNL_64: # BB#0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %ymm2, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test6:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm2
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2}
|
|
|
|
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %ymm1, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test6:
|
|
|
|
; SKX: # BB#0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
|
2016-12-28 18:12:48 +08:00
|
|
|
; SKX-NEXT: vmovdqa %ymm2, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test6:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm2 {%k2}
|
|
|
|
; SKX_32-NEXT: vpscatterdd %ymm0, (,%ymm1) {%k1}
|
2016-12-28 18:12:48 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa %ymm2, %ymm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-05-03 15:12:25 +08:00
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
%a = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
|
2015-05-03 15:12:25 +08:00
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
2015-05-03 15:12:25 +08:00
|
|
|
ret <8 x i32>%a
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
|
2015-12-15 16:40:41 +08:00
|
|
|
;
|
|
|
|
; KNL_64-LABEL: test7:
|
|
|
|
; KNL_64: # BB#0:
|
2016-01-18 20:02:45 +08:00
|
|
|
; KNL_64-NEXT: kmovw %esi, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
|
|
|
; KNL_64-NEXT: kmovw %k1, %k2
|
|
|
|
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k2}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %ymm1, %ymm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
|
|
|
|
; KNL_64-NEXT: vpaddd %ymm2, %ymm1, %ymm0
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test7:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-06-14 11:13:00 +08:00
|
|
|
; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
2016-04-03 16:41:12 +08:00
|
|
|
; KNL_32-NEXT: kmovw %ecx, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
|
|
|
; KNL_32-NEXT: kmovw %k1, %k2
|
|
|
|
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %ymm1, %ymm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
|
|
|
|
; KNL_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test7:
|
|
|
|
; SKX: # BB#0:
|
2017-03-29 00:35:29 +08:00
|
|
|
; SKX-NEXT: kmovw %esi, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: kmovw %k1, %k2
|
|
|
|
; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2}
|
2016-12-28 18:12:48 +08:00
|
|
|
; SKX-NEXT: vmovdqa %ymm1, %ymm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1}
|
|
|
|
; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0
|
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test7:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1
|
|
|
|
; SKX_32-NEXT: kmovw %k1, %k2
|
|
|
|
; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm1 {%k2}
|
2016-12-28 18:12:48 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa %ymm1, %ymm2
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm2 {%k1}
|
|
|
|
; SKX_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
|
|
|
|
; SKX_32-NEXT: retl
|
2015-05-03 15:12:25 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind
|
|
|
|
%imask = bitcast i8 %mask to <8 x i1>
|
2017-05-03 20:28:54 +08:00
|
|
|
%gt1 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
|
|
|
|
%gt2 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
|
2015-05-03 15:12:25 +08:00
|
|
|
%res = add <8 x i32> %gt1, %gt2
|
|
|
|
ret <8 x i32> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; No uniform base in this case, index <8 x i64> contains addresses,
|
|
|
|
; each gather call will be split into two
|
|
|
|
define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test8:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: kmovw %edi, %k1
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: kmovw %k2, %k3
|
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3}
|
|
|
|
; KNL_64-NEXT: kmovw %k1, %k3
|
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3}
|
|
|
|
; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm4
|
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
|
|
|
|
; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
|
|
|
; KNL_64-NEXT: vpaddd %zmm0, %zmm4, %zmm0
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test8:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
|
|
; KNL_32-NEXT: kmovw %k1, %k2
|
|
|
|
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
|
|
|
|
; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test8:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: kmovw %edi, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: kmovw %k2, %k3
|
|
|
|
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3}
|
|
|
|
; SKX-NEXT: kmovw %k1, %k3
|
|
|
|
; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm4
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpaddd %zmm0, %zmm4, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test8:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
|
|
; SKX_32-NEXT: kmovw %k1, %k2
|
|
|
|
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
|
|
|
|
; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
|
|
|
|
; SKX_32-NEXT: retl
|
|
|
|
|
2015-05-03 15:12:25 +08:00
|
|
|
%imask = bitcast i16 %mask to <16 x i1>
|
2017-05-03 20:28:54 +08:00
|
|
|
%gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
|
|
|
|
%gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
|
2015-05-03 15:12:25 +08:00
|
|
|
%res = add <16 x i32> %gt1, %gt2
|
|
|
|
ret <16 x i32> %res
|
|
|
|
}
|
2015-09-02 16:39:13 +08:00
|
|
|
|
|
|
|
%struct.RT = type { i8, [10 x [20 x i32]], i8 }
|
|
|
|
%struct.ST = type { i32, double, %struct.RT }
|
|
|
|
|
|
|
|
; Masked gather for agregate types
|
|
|
|
; Test9 and Test10 should give the same result (scalar and vector indices in GEP)
|
|
|
|
|
|
|
|
|
|
|
|
define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test9:
|
|
|
|
; KNL_64: # BB#0: # %entry
|
|
|
|
; KNL_64-NEXT: vpbroadcastq %rdi, %zmm2
|
2017-07-26 18:37:12 +08:00
|
|
|
; KNL_64-NEXT: vpbroadcastq {{.*#+}} zmm3 = [824,824,824,824,824,824,824,824]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm4
|
|
|
|
; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0
|
|
|
|
; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0
|
|
|
|
; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0
|
2016-07-18 14:14:54 +08:00
|
|
|
; KNL_64-NEXT: vpaddq %zmm2, %zmm0, %zmm0
|
2016-12-12 18:49:15 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1
|
|
|
|
; KNL_64-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
2016-07-18 14:14:54 +08:00
|
|
|
; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test9:
|
|
|
|
; KNL_32: # BB#0: # %entry
|
|
|
|
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2
|
2017-07-26 18:37:12 +08:00
|
|
|
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [80,80,80,80,80,80,80,80]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1
|
|
|
|
; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
|
2017-07-26 18:37:12 +08:00
|
|
|
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
2017-07-26 18:37:12 +08:00
|
|
|
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [68,68,68,68,68,68,68,68]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
2016-07-18 14:14:54 +08:00
|
|
|
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
2017-11-14 01:53:59 +08:00
|
|
|
; SKX_SMALL-LABEL: test9:
|
|
|
|
; SKX_SMALL: # BB#0: # %entry
|
|
|
|
; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
|
|
|
|
; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
|
|
|
|
; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
|
|
|
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
|
|
|
; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
|
|
|
; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
|
|
|
|
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
|
|
|
; SKX_SMALL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_LARGE-LABEL: test9:
|
|
|
|
; SKX_LARGE: # BB#0: # %entry
|
|
|
|
; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
|
|
|
|
; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
|
|
|
|
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
|
|
|
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
|
|
|
|
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
|
|
|
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
|
|
|
|
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
|
|
|
; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
|
|
|
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
|
|
|
; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
|
|
|
|
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
|
|
|
; SKX_LARGE-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test9:
|
|
|
|
; SKX_32: # BB#0: # %entry
|
|
|
|
; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1
|
|
|
|
; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
|
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
|
|
|
|
; SKX_32-NEXT: retl
|
2015-09-02 16:39:13 +08:00
|
|
|
entry:
|
|
|
|
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13>
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
|
2015-09-02 16:39:13 +08:00
|
|
|
ret <8 x i32> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test10:
|
|
|
|
; KNL_64: # BB#0: # %entry
|
|
|
|
; KNL_64-NEXT: vpbroadcastq %rdi, %zmm2
|
2017-07-26 18:37:12 +08:00
|
|
|
; KNL_64-NEXT: vpbroadcastq {{.*#+}} zmm3 = [824,824,824,824,824,824,824,824]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm4
|
|
|
|
; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0
|
|
|
|
; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0
|
|
|
|
; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0
|
2016-07-18 14:14:54 +08:00
|
|
|
; KNL_64-NEXT: vpaddq %zmm2, %zmm0, %zmm0
|
2016-12-12 18:49:15 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1
|
|
|
|
; KNL_64-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
2016-07-18 14:14:54 +08:00
|
|
|
; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test10:
|
|
|
|
; KNL_32: # BB#0: # %entry
|
|
|
|
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2
|
2017-07-26 18:37:12 +08:00
|
|
|
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [80,80,80,80,80,80,80,80]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1
|
|
|
|
; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
|
2017-07-26 18:37:12 +08:00
|
|
|
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
2017-07-26 18:37:12 +08:00
|
|
|
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [68,68,68,68,68,68,68,68]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
2016-07-18 14:14:54 +08:00
|
|
|
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
2017-11-14 01:53:59 +08:00
|
|
|
; SKX_SMALL-LABEL: test10:
|
|
|
|
; SKX_SMALL: # BB#0: # %entry
|
|
|
|
; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
|
|
|
|
; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
|
|
|
; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
|
|
|
|
; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
|
|
|
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
|
|
|
; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
|
|
|
; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
|
|
|
|
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
|
|
|
; SKX_SMALL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_LARGE-LABEL: test10:
|
|
|
|
; SKX_LARGE: # BB#0: # %entry
|
|
|
|
; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
|
|
|
|
; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
|
|
|
|
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
|
|
|
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
|
|
|
|
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
|
|
|
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
|
|
|
|
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
|
|
|
; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
|
|
|
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
|
|
|
; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
|
|
|
|
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
|
|
|
; SKX_LARGE-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test10:
|
|
|
|
; SKX_32: # BB#0: # %entry
|
|
|
|
; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1
|
|
|
|
; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
|
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
|
|
|
|
; SKX_32-NEXT: retl
|
2015-09-02 16:39:13 +08:00
|
|
|
entry:
|
|
|
|
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
|
2015-09-02 16:39:13 +08:00
|
|
|
ret <8 x i32> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Splat index in GEP, requires broadcast
|
|
|
|
define <16 x float> @test11(float* %base, i32 %ind) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test11:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpbroadcastd %esi, %zmm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test11:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-09-29 13:54:39 +08:00
|
|
|
; KNL_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test11:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpbroadcastd %esi, %zmm1
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
|
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test11:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-09-29 13:54:39 +08:00
|
|
|
; SKX_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
|
|
|
|
; SKX_32-NEXT: retl
|
2015-09-02 16:39:13 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
2015-09-02 16:39:13 +08:00
|
|
|
ret <16 x float>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
; We are checking the uniform base here. It is taken directly from input to vgatherdps
|
|
|
|
define <16 x float> @test12(float* %base, <16 x i32> %ind) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test12:
|
|
|
|
; KNL_64: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test12:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test12:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test12:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-09-02 16:39:13 +08:00
|
|
|
|
|
|
|
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
|
|
|
%gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
2015-09-02 16:39:13 +08:00
|
|
|
ret <16 x float>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
; The same as the previous, but the mask is undefined
|
|
|
|
define <16 x float> @test13(float* %base, <16 x i32> %ind) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test13:
|
|
|
|
; KNL_64: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
2017-01-20 15:12:30 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test13:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
2017-01-20 15:12:30 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test13:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
|
2017-01-20 15:12:30 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test13:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
2017-01-20 15:12:30 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k2
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2}
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-09-02 16:39:13 +08:00
|
|
|
|
|
|
|
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
|
|
|
%gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
2015-09-02 16:39:13 +08:00
|
|
|
ret <16 x float>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
; The base pointer is not splat, can't find unform base
|
|
|
|
define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) {
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-LABEL: test14:
|
|
|
|
; KNL_64: # BB#0:
|
2017-02-15 14:58:47 +08:00
|
|
|
; KNL_64-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpbroadcastq %xmm0, %zmm0
|
|
|
|
; KNL_64-NEXT: vmovd %esi, %xmm1
|
|
|
|
; KNL_64-NEXT: vpbroadcastd %xmm1, %ymm1
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1
|
|
|
|
; KNL_64-NEXT: vpsllq $2, %zmm1, %zmm1
|
|
|
|
; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
2017-01-20 15:12:30 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
2017-08-27 13:39:57 +08:00
|
|
|
; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm1 {%k1}
|
|
|
|
; KNL_64-NEXT: vinsertf64x4 $1, %ymm1, %zmm1, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test14:
|
|
|
|
; KNL_32: # BB#0:
|
2017-02-15 14:58:47 +08:00
|
|
|
; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpbroadcastd %xmm0, %zmm0
|
|
|
|
; KNL_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
|
|
|
|
; KNL_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1
|
2017-01-20 15:12:30 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vgatherdps (,%zmm1), %zmm0 {%k1}
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test14:
|
|
|
|
; SKX: # BB#0:
|
2017-02-15 14:58:47 +08:00
|
|
|
; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpbroadcastq %xmm0, %zmm0
|
2016-02-07 16:30:50 +08:00
|
|
|
; SKX-NEXT: vpbroadcastd %esi, %ymm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
|
|
|
|
; SKX-NEXT: vpsllq $2, %zmm1, %zmm1
|
|
|
|
; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
2017-01-20 15:12:30 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
2017-08-27 13:39:57 +08:00
|
|
|
; SKX-NEXT: vgatherqps (,%zmm0), %ymm1 {%k1}
|
|
|
|
; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm1, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test14:
|
|
|
|
; SKX_32: # BB#0:
|
2017-02-15 14:58:47 +08:00
|
|
|
; SKX_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: vpbroadcastd %xmm0, %zmm0
|
|
|
|
; SKX_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
|
|
|
|
; SKX_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1
|
2017-01-20 15:12:30 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: vgatherdps (,%zmm1), %zmm0 {%k1}
|
|
|
|
; SKX_32-NEXT: retl
|
2015-09-02 16:39:13 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <16 x float*> %vec, float* %base, i32 1
|
|
|
|
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
|
2015-09-02 16:39:13 +08:00
|
|
|
ret <16 x float>%res
|
|
|
|
}
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
|
|
|
|
declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
|
|
|
|
declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
|
2015-12-15 16:40:41 +08:00
|
|
|
|
|
|
|
; Gather smaller than existing instruction
|
|
|
|
define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
|
|
|
|
; KNL_64-LABEL: test15:
|
|
|
|
; KNL_64: # BB#0:
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %xmm0, %ymm2
|
2016-01-28 21:19:25 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
|
|
|
|
; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test15:
|
|
|
|
; KNL_32: # BB#0:
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %xmm0, %ymm2
|
2016-01-28 21:19:25 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0
|
|
|
|
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test15:
|
|
|
|
; SKX: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %xmm0, %ymm1
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm0 {%k1}
|
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test15:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpmovsxdq %xmm0, %ymm1
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%ymm1,4), %xmm0 {%k1}
|
|
|
|
; SKX_32-NEXT: vzeroupper
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
|
|
|
|
%sext_ind = sext <4 x i32> %ind to <4 x i64>
|
|
|
|
%gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <4 x float>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Gather smaller than existing instruction
|
|
|
|
define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x double> %src0) {
|
|
|
|
; KNL_64-LABEL: test16:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
|
|
|
|
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %ymm1, %ymm1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %xmm0, %ymm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_64-NEXT: vmovapd %ymm2, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test16:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
|
|
|
|
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %ymm1, %ymm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %xmm0, %ymm0
|
2016-09-06 08:31:10 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_32-NEXT: vmovapd %ymm2, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test16:
|
|
|
|
; SKX: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %xmm0, %ymm0
|
|
|
|
; SKX-NEXT: vgatherqpd (%rdi,%ymm0,8), %ymm2 {%k1}
|
2016-08-01 04:20:01 +08:00
|
|
|
; SKX-NEXT: vmovapd %ymm2, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test16:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpmovsxdq %xmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vgatherqpd (%eax,%ymm0,8), %ymm2 {%k1}
|
2016-08-01 04:20:01 +08:00
|
|
|
; SKX_32-NEXT: vmovapd %ymm2, %ymm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
|
|
|
|
%sext_ind = sext <4 x i32> %ind to <4 x i64>
|
|
|
|
%gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <4 x double>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x double> %src0) {
|
|
|
|
; KNL_64-LABEL: test17:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_64-NEXT: vmovapd %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test17:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
|
2016-09-06 08:31:10 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_32-NEXT: vmovapd %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test17:
|
|
|
|
; SKX: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
|
2016-08-01 04:20:01 +08:00
|
|
|
; SKX-NEXT: vmovapd %xmm2, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test17:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1}
|
2016-08-01 04:20:01 +08:00
|
|
|
; SKX_32-NEXT: vmovapd %xmm2, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
|
|
|
|
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
|
|
|
%gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <2 x double>%res
|
|
|
|
}
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
|
|
|
|
declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
|
|
|
|
declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
|
|
|
|
declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
|
|
|
|
declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
|
2015-12-15 16:40:41 +08:00
|
|
|
|
|
|
|
define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
|
|
|
|
; KNL_64-LABEL: test18:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
|
|
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm2, %xmm2
|
2016-01-28 21:19:25 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
|
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test18:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
|
|
|
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm2, %xmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
|
|
|
|
; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test18:
|
|
|
|
; SKX: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test18:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1}
|
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind) {
|
|
|
|
; KNL_64-LABEL: test19:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
|
|
|
|
; KNL_64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
|
|
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %ymm1, %ymm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test19:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
|
|
|
|
; KNL_32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
|
|
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %ymm1, %ymm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-09-06 08:31:10 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test19:
|
|
|
|
; SKX: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test19:
|
|
|
|
; SKX_32: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; SKX_32-NEXT: vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: retl
|
|
|
|
%gep = getelementptr double, double* %ptr, <4 x i64> %ind
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Data type requires widening
|
|
|
|
define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
|
|
|
|
; KNL_64-LABEL: test20:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
|
|
|
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; KNL_64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_64-NEXT: vmovaps %xmm2, %xmm2
|
2016-01-28 21:19:25 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
|
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test20:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_32-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero
|
|
|
|
; KNL_32-NEXT: vmovaps %xmm2, %xmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
|
|
|
|
; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test20:
|
|
|
|
; SKX: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
|
|
|
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
|
2017-11-06 17:22:38 +08:00
|
|
|
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vscatterqps %xmm0, (,%ymm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test20:
|
|
|
|
; SKX_32: # BB#0:
|
2017-09-18 11:29:47 +08:00
|
|
|
; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
|
2017-11-06 17:22:38 +08:00
|
|
|
; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1}
|
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
2015-09-02 16:39:13 +08:00
|
|
|
|
2015-12-15 16:40:41 +08:00
|
|
|
; Data type requires promotion
|
|
|
|
define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
|
|
|
|
; KNL_64-LABEL: test21:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm2, %xmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test21:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm2, %xmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
2016-09-06 08:31:10 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
|
|
|
|
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test21:
|
|
|
|
; SKX: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
|
|
|
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
|
2017-11-06 17:22:38 +08:00
|
|
|
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
|
|
|
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test21:
|
|
|
|
; SKX_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
|
|
|
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
|
2017-11-06 17:22:38 +08:00
|
|
|
; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
|
|
|
; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX_32-NEXT: vzeroupper
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
2015-10-25 23:37:55 +08:00
|
|
|
|
2015-12-15 16:40:41 +08:00
|
|
|
; The result type requires widening
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
|
2015-12-15 16:40:41 +08:00
|
|
|
|
|
|
|
define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) {
|
|
|
|
; KNL_64-LABEL: test22:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
|
|
|
|
; KNL_64-NEXT: vmovaps %xmm1, %xmm1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
|
2016-01-28 21:19:25 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
|
|
|
|
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_64-NEXT: vmovaps %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test22:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
|
|
|
|
; KNL_32-NEXT: vmovaps %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
|
2016-01-28 21:19:25 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
|
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_32-NEXT: vmovaps %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test22:
|
|
|
|
; SKX: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
|
2017-11-06 17:22:38 +08:00
|
|
|
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1}
|
2016-07-18 14:14:34 +08:00
|
|
|
; SKX-NEXT: vmovaps %xmm2, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test22:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
|
2017-11-06 17:22:38 +08:00
|
|
|
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm2 {%k1}
|
2016-07-18 14:14:34 +08:00
|
|
|
; SKX_32-NEXT: vmovaps %xmm2, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
|
|
|
%gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <2 x float>%res
|
|
|
|
}
|
|
|
|
|
2017-06-22 14:47:41 +08:00
|
|
|
define <2 x float> @test22a(float* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x float> %src0) {
|
|
|
|
; KNL_64-LABEL: test22a:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
|
|
|
|
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
|
|
|
; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_64-NEXT: vmovaps %xmm1, %xmm1
|
2017-06-22 14:47:41 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
|
|
|
|
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
|
|
|
|
; KNL_64-NEXT: vmovaps %xmm2, %xmm0
|
|
|
|
; KNL_64-NEXT: vzeroupper
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test22a:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
|
|
|
|
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
|
|
|
; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_32-NEXT: vmovaps %xmm1, %xmm1
|
2017-06-22 14:47:41 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
|
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
|
|
|
|
; KNL_32-NEXT: vmovaps %xmm2, %xmm0
|
|
|
|
; KNL_32-NEXT: vzeroupper
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test22a:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1}
|
|
|
|
; SKX-NEXT: vmovaps %xmm2, %xmm0
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test22a:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
|
|
|
|
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm2 {%k1}
|
|
|
|
; SKX_32-NEXT: vmovaps %xmm2, %xmm0
|
|
|
|
; SKX_32-NEXT: retl
|
|
|
|
%gep.random = getelementptr float, float* %base, <2 x i64> %ind
|
|
|
|
%res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
|
|
|
|
ret <2 x float>%res
|
|
|
|
}
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
|
|
|
|
declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
|
2015-12-15 16:40:41 +08:00
|
|
|
|
|
|
|
define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
|
|
|
|
; KNL_64-LABEL: test23:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test23:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
|
2016-09-06 08:31:10 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test23:
|
|
|
|
; SKX: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
|
2017-06-22 14:47:41 +08:00
|
|
|
; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
|
|
|
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
|
|
|
|
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test23:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
|
2017-06-22 14:47:41 +08:00
|
|
|
; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
|
|
|
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
|
|
|
|
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
|
|
|
%gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <2 x i32>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
|
|
|
|
; KNL_64-LABEL: test24:
|
|
|
|
; KNL_64: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm1
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: movb $3, %al
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: kmovw %eax, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm1,8), %zmm0 {%k1}
|
|
|
|
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test24:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm1
|
|
|
|
; KNL_32-NEXT: vmovdqa {{.*#+}} xmm0 = [1,0,1,0]
|
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
|
|
; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k1
|
|
|
|
; KNL_32-NEXT: vpgatherqq (%eax,%zmm1,8), %zmm0 {%k1}
|
|
|
|
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test24:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
2017-06-22 14:47:41 +08:00
|
|
|
; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
|
|
|
|
; SKX-NEXT: vpmovsxdq %xmm1, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test24:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-06-22 14:47:41 +08:00
|
|
|
; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
|
|
|
|
; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
|
|
|
%gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <2 x i32>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %src0) {
|
|
|
|
; KNL_64-LABEL: test25:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm1, %xmm1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test25:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm1, %xmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
|
2016-09-06 08:31:10 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test25:
|
|
|
|
; SKX: # BB#0:
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
|
2016-12-28 18:12:48 +08:00
|
|
|
; SKX-NEXT: vmovdqa %xmm2, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test25:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
|
2016-01-28 21:19:25 +08:00
|
|
|
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
|
2016-12-28 18:12:48 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa %xmm2, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
|
|
|
%gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <2 x i64>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
|
|
|
|
; KNL_64-LABEL: test26:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: movb $3, %al
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: kmovw %eax, %k1
|
|
|
|
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test26:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,1,0]
|
2016-09-06 08:31:10 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
|
|
|
|
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
|
2016-09-05 14:43:06 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test26:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
|
2016-12-28 18:12:48 +08:00
|
|
|
; SKX-NEXT: vmovdqa %xmm1, %xmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test26:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
|
2016-12-28 18:12:48 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa %xmm1, %xmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
|
|
|
%gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <2 x i64>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Result type requires widening; all-ones mask
|
|
|
|
define <2 x float> @test27(float* %base, <2 x i32> %ind) {
|
|
|
|
; KNL_64-LABEL: test27:
|
|
|
|
; KNL_64: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: movb $3, %al
|
|
|
|
; KNL_64-NEXT: kmovw %eax, %k1
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test27:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movb $3, %cl
|
|
|
|
; KNL_32-NEXT: kmovw %ecx, %k1
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test27:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpsraq $32, %xmm0, %xmm1
|
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm0 {%k1}
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test27:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm1
|
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%xmm1,4), %xmm0 {%k1}
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
%sext_ind = sext <2 x i32> %ind to <2 x i64>
|
|
|
|
%gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <2 x float>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Data type requires promotion, mask is all-ones
|
|
|
|
define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
|
|
|
|
; KNL_64-LABEL: test28:
|
|
|
|
; KNL_64: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
|
|
|
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: movb $3, %al
|
|
|
|
; KNL_64-NEXT: kmovw %eax, %k1
|
|
|
|
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test28:
|
|
|
|
; KNL_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
|
|
|
|
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
2017-09-04 06:25:50 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,1,0]
|
2016-09-06 08:31:10 +08:00
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
|
|
|
|
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test28:
|
|
|
|
; SKX: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: movb $3, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; SKX-NEXT: kmovw %eax, %k1
|
|
|
|
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test28:
|
|
|
|
; SKX_32: # BB#0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movb $3, %al
|
2017-03-29 00:35:29 +08:00
|
|
|
; SKX_32-NEXT: kmovw %eax, %k1
|
|
|
|
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX_32-NEXT: vzeroupper
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; SCALAR-LABEL: test29
|
|
|
|
; SCALAR: extractelement <16 x float*>
|
2015-10-25 23:37:55 +08:00
|
|
|
; SCALAR-NEXT: load float
|
|
|
|
; SCALAR-NEXT: insertelement <16 x float>
|
|
|
|
; SCALAR-NEXT: extractelement <16 x float*>
|
|
|
|
; SCALAR-NEXT: load float
|
|
|
|
|
2015-12-15 16:40:41 +08:00
|
|
|
define <16 x float> @test29(float* %base, <16 x i32> %ind) {
|
|
|
|
; KNL_64-LABEL: test29:
|
|
|
|
; KNL_64: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_64-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
|
|
|
|
; KNL_64-NEXT: kxorw %k0, %k0, %k1
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
|
|
|
|
; KNL_64-NEXT: movb $44, %al
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: kmovw %eax, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test29:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
|
|
|
; KNL_32-NEXT: kxorw %k0, %k0, %k1
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
|
|
|
|
; KNL_32-NEXT: movb $44, %cl
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: kmovw %ecx, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test29:
|
|
|
|
; SKX: # BB#0:
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX-NEXT: vpmovsxdq %ymm0, %zmm0
|
|
|
|
; SKX-NEXT: kxorw %k0, %k0, %k1
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
|
|
|
|
; SKX-NEXT: movb $44, %al
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: kmovw %eax, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2016-08-26 06:48:11 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test29:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm1
|
|
|
|
; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; SKX_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
|
|
|
; SKX_32-NEXT: kxorw %k0, %k0, %k1
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
|
|
|
|
; SKX_32-NEXT: movb $44, %cl
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: kmovw %ecx, %k1
|
2017-11-17 07:08:57 +08:00
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
|
|
|
|
; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-10-25 23:37:55 +08:00
|
|
|
|
|
|
|
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
|
|
|
|
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
|
|
|
|
|
|
|
|
%sext_ind = sext <16 x i32> %ind to <16 x i64>
|
|
|
|
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef)
|
2015-10-25 23:37:55 +08:00
|
|
|
ret <16 x float>%res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check non-power-of-2 case. It should be scalarized.
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
|
2015-12-15 16:40:41 +08:00
|
|
|
define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
|
2017-11-11 02:48:18 +08:00
|
|
|
; KNL_64-LABEL: test30:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: kmovw %edx, %k0
|
|
|
|
; KNL_64-NEXT: kmovw %esi, %k2
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
|
|
|
|
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
|
|
|
|
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
|
|
|
; KNL_64-NEXT: testb $1, %dil
|
|
|
|
; KNL_64-NEXT: # implicit-def: %XMM0
|
|
|
|
; KNL_64-NEXT: je .LBB30_2
|
|
|
|
; KNL_64-NEXT: # BB#1: # %cond.load
|
|
|
|
; KNL_64-NEXT: vmovq %xmm1, %rax
|
|
|
|
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; KNL_64-NEXT: .LBB30_2: # %else
|
|
|
|
; KNL_64-NEXT: kmovw %edi, %k1
|
|
|
|
; KNL_64-NEXT: kshiftlw $15, %k2, %k2
|
|
|
|
; KNL_64-NEXT: kshiftrw $15, %k2, %k2
|
|
|
|
; KNL_64-NEXT: kmovw %k2, %eax
|
|
|
|
; KNL_64-NEXT: testb $1, %al
|
|
|
|
; KNL_64-NEXT: je .LBB30_4
|
|
|
|
; KNL_64-NEXT: # BB#3: # %cond.load1
|
|
|
|
; KNL_64-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: .LBB30_4: # %else2
|
|
|
|
; KNL_64-NEXT: kshiftlw $15, %k0, %k0
|
|
|
|
; KNL_64-NEXT: kshiftrw $15, %k0, %k0
|
|
|
|
; KNL_64-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL_64-NEXT: testb $1, %al
|
|
|
|
; KNL_64-NEXT: je .LBB30_6
|
|
|
|
; KNL_64-NEXT: # BB#5: # %cond.load4
|
|
|
|
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
|
|
|
|
; KNL_64-NEXT: vmovq %xmm1, %rax
|
|
|
|
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: .LBB30_6: # %else5
|
|
|
|
; KNL_64-NEXT: kmovw %k2, %eax
|
|
|
|
; KNL_64-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; KNL_64-NEXT: kshiftrw $15, %k1, %k1
|
|
|
|
; KNL_64-NEXT: kmovw %k1, %ecx
|
|
|
|
; KNL_64-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; KNL_64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; KNL_64-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL_64-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
|
|
|
|
; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
|
|
|
|
; KNL_64-NEXT: vzeroupper
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test30:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: kmovw %eax, %k0
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: kmovw %eax, %k2
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
|
|
|
|
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
|
|
|
; KNL_32-NEXT: testb $1, %al
|
|
|
|
; KNL_32-NEXT: # implicit-def: %XMM0
|
|
|
|
; KNL_32-NEXT: je .LBB30_2
|
|
|
|
; KNL_32-NEXT: # BB#1: # %cond.load
|
|
|
|
; KNL_32-NEXT: vmovd %xmm1, %ecx
|
|
|
|
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; KNL_32-NEXT: .LBB30_2: # %else
|
|
|
|
; KNL_32-NEXT: kmovw %eax, %k1
|
|
|
|
; KNL_32-NEXT: kshiftlw $15, %k2, %k2
|
|
|
|
; KNL_32-NEXT: kshiftrw $15, %k2, %k2
|
|
|
|
; KNL_32-NEXT: kmovw %k2, %eax
|
|
|
|
; KNL_32-NEXT: testb $1, %al
|
|
|
|
; KNL_32-NEXT: je .LBB30_4
|
|
|
|
; KNL_32-NEXT: # BB#3: # %cond.load1
|
|
|
|
; KNL_32-NEXT: vpextrd $1, %xmm1, %eax
|
|
|
|
; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: .LBB30_4: # %else2
|
|
|
|
; KNL_32-NEXT: kshiftlw $15, %k0, %k0
|
|
|
|
; KNL_32-NEXT: kshiftrw $15, %k0, %k0
|
|
|
|
; KNL_32-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL_32-NEXT: testb $1, %al
|
|
|
|
; KNL_32-NEXT: je .LBB30_6
|
|
|
|
; KNL_32-NEXT: # BB#5: # %cond.load4
|
|
|
|
; KNL_32-NEXT: vpextrd $2, %xmm1, %eax
|
|
|
|
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: .LBB30_6: # %else5
|
|
|
|
; KNL_32-NEXT: kmovw %k2, %eax
|
|
|
|
; KNL_32-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; KNL_32-NEXT: kshiftrw $15, %k1, %k1
|
|
|
|
; KNL_32-NEXT: kmovw %k1, %ecx
|
|
|
|
; KNL_32-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; KNL_32-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; KNL_32-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL_32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
|
|
|
|
; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test30:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1
|
|
|
|
; SKX-NEXT: kshiftlw $15, %k1, %k0
|
|
|
|
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
|
|
|
; SKX-NEXT: kmovw %k0, %eax
|
|
|
|
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
|
|
|
|
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
|
|
|
|
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
|
|
|
; SKX-NEXT: testb $1, %al
|
|
|
|
; SKX-NEXT: # implicit-def: %XMM0
|
|
|
|
; SKX-NEXT: je .LBB30_2
|
|
|
|
; SKX-NEXT: # BB#1: # %cond.load
|
|
|
|
; SKX-NEXT: vmovq %xmm1, %rax
|
|
|
|
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; SKX-NEXT: .LBB30_2: # %else
|
|
|
|
; SKX-NEXT: kshiftlw $14, %k1, %k0
|
|
|
|
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
|
|
|
; SKX-NEXT: kmovw %k0, %eax
|
|
|
|
; SKX-NEXT: testb $1, %al
|
|
|
|
; SKX-NEXT: je .LBB30_4
|
|
|
|
; SKX-NEXT: # BB#3: # %cond.load1
|
|
|
|
; SKX-NEXT: vpextrq $1, %xmm1, %rax
|
|
|
|
; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: .LBB30_4: # %else2
|
|
|
|
; SKX-NEXT: kshiftlw $13, %k1, %k0
|
|
|
|
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
|
|
|
; SKX-NEXT: kmovw %k0, %eax
|
|
|
|
; SKX-NEXT: testb $1, %al
|
|
|
|
; SKX-NEXT: je .LBB30_6
|
|
|
|
; SKX-NEXT: # BB#5: # %cond.load4
|
|
|
|
; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
|
|
|
|
; SKX-NEXT: vmovq %xmm1, %rax
|
|
|
|
; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: .LBB30_6: # %else5
|
|
|
|
; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1}
|
|
|
|
; SKX-NEXT: vmovdqa %xmm3, %xmm0
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test30:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: subl $12, %esp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
|
|
|
|
; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1
|
|
|
|
; SKX_32-NEXT: kshiftlw $15, %k1, %k0
|
|
|
|
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
|
|
|
; SKX_32-NEXT: kmovw %k0, %eax
|
|
|
|
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
|
|
|
|
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
|
|
|
; SKX_32-NEXT: testb $1, %al
|
|
|
|
; SKX_32-NEXT: # implicit-def: %XMM1
|
|
|
|
; SKX_32-NEXT: je .LBB30_2
|
|
|
|
; SKX_32-NEXT: # BB#1: # %cond.load
|
|
|
|
; SKX_32-NEXT: vmovd %xmm2, %eax
|
|
|
|
; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; SKX_32-NEXT: .LBB30_2: # %else
|
|
|
|
; SKX_32-NEXT: kshiftlw $14, %k1, %k0
|
|
|
|
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
|
|
|
; SKX_32-NEXT: kmovw %k0, %eax
|
|
|
|
; SKX_32-NEXT: testb $1, %al
|
|
|
|
; SKX_32-NEXT: je .LBB30_4
|
|
|
|
; SKX_32-NEXT: # BB#3: # %cond.load1
|
|
|
|
; SKX_32-NEXT: vpextrd $1, %xmm2, %eax
|
|
|
|
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
|
|
|
|
; SKX_32-NEXT: .LBB30_4: # %else2
|
|
|
|
; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
|
|
|
|
; SKX_32-NEXT: kshiftlw $13, %k1, %k0
|
|
|
|
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
|
|
|
; SKX_32-NEXT: kmovw %k0, %eax
|
|
|
|
; SKX_32-NEXT: testb $1, %al
|
|
|
|
; SKX_32-NEXT: je .LBB30_6
|
|
|
|
; SKX_32-NEXT: # BB#5: # %cond.load4
|
|
|
|
; SKX_32-NEXT: vpextrd $2, %xmm2, %eax
|
|
|
|
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
|
|
|
|
; SKX_32-NEXT: .LBB30_6: # %else5
|
|
|
|
; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
|
|
|
; SKX_32-NEXT: addl $12, %esp
|
|
|
|
; SKX_32-NEXT: retl
|
2015-12-15 16:40:41 +08:00
|
|
|
|
2015-10-25 23:37:55 +08:00
|
|
|
%sext_ind = sext <3 x i32> %ind to <3 x i64>
|
|
|
|
%gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
|
2015-10-25 23:37:55 +08:00
|
|
|
ret <3 x i32>%res
|
|
|
|
}
|
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
|
2015-12-15 16:40:41 +08:00
|
|
|
define <16 x float*> @test31(<16 x float**> %ptrs) {
|
|
|
|
; KNL_64-LABEL: test31:
|
|
|
|
; KNL_64: # BB#0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
|
|
; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test31:
|
|
|
|
; KNL_32: # BB#0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test31:
|
|
|
|
; SKX: # BB#0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2}
|
|
|
|
; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
|
|
; SKX-NEXT: vmovdqa64 %zmm3, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test31:
|
|
|
|
; SKX_32: # BB#0:
|
AVX-512: Kreg set 0/1 optimization
The patterns that set a mask register to 0/1
KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn
are replaced with
KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization.
KNL does not recognize dependency-breaking idioms for mask registers,
so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
Using %k0 as the undef input register is a performance heuristic based
on the assumption that %k0 is used less frequently than the other mask
registers, since it is not usable as a write mask.
Differential Revision: http://reviews.llvm.org/D15739
llvm-svn: 256365
2015-12-24 16:12:22 +08:00
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2015-11-19 15:17:16 +08:00
|
|
|
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
|
2015-11-19 15:17:16 +08:00
|
|
|
ret <16 x float*>%res
|
|
|
|
}
|
2015-12-15 16:40:41 +08:00
|
|
|
|
|
|
|
define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) {
|
|
|
|
; KNL_64-LABEL: test_gather_16i32:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm2
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
|
|
|
|
; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_gather_16i32:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_gather_16i32:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_gather_16i32:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <16 x i32> %res
|
|
|
|
}
|
|
|
|
define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
|
|
|
|
; KNL_64-LABEL: test_gather_16i64:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1}
|
|
|
|
; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm0
|
|
|
|
; KNL_64-NEXT: vmovdqa64 %zmm4, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_gather_16i64:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: pushl %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; KNL_32-NEXT: movl %esp, %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; KNL_32-NEXT: andl $-64, %esp
|
|
|
|
; KNL_32-NEXT: subl $64, %esp
|
|
|
|
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1
|
|
|
|
; KNL_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1}
|
|
|
|
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl %ebp, %esp
|
|
|
|
; KNL_32-NEXT: popl %ebp
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_gather_16i64:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1}
|
|
|
|
; SKX-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX-NEXT: vmovdqa64 %zmm3, %zmm0
|
|
|
|
; SKX-NEXT: vmovdqa64 %zmm4, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_gather_16i64:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: pushl %ebp
|
2017-06-28 18:21:17 +08:00
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_offset 8
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; SKX_32-NEXT: movl %esp, %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; SKX_32-NEXT: andl $-64, %esp
|
|
|
|
; SKX_32-NEXT: subl $64, %esp
|
|
|
|
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1
|
|
|
|
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movl %ebp, %esp
|
|
|
|
; SKX_32-NEXT: popl %ebp
|
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <16 x i64> %res
|
|
|
|
}
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) {
|
|
|
|
; KNL_64-LABEL: test_gather_16f32:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm2
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2}
|
|
|
|
; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1}
|
|
|
|
; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm3, %zmm0
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_gather_16f32:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1}
|
|
|
|
; KNL_32-NEXT: vmovaps %zmm2, %zmm0
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_gather_16f32:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2}
|
|
|
|
; SKX-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm3, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_gather_16f32:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1}
|
|
|
|
; SKX_32-NEXT: vmovaps %zmm2, %zmm0
|
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <16 x float> %res
|
|
|
|
}
|
|
|
|
define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) {
|
|
|
|
; KNL_64-LABEL: test_gather_16f64:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1}
|
|
|
|
; KNL_64-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_64-NEXT: vmovapd %zmm3, %zmm0
|
|
|
|
; KNL_64-NEXT: vmovapd %zmm4, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_gather_16f64:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: pushl %ebp
|
2017-06-28 18:21:17 +08:00
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; KNL_32-NEXT: movl %esp, %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; KNL_32-NEXT: andl $-64, %esp
|
|
|
|
; KNL_32-NEXT: subl $64, %esp
|
|
|
|
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1
|
|
|
|
; KNL_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
|
2017-08-01 06:07:29 +08:00
|
|
|
; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL_32-NEXT: vmovapd %zmm2, %zmm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: movl %ebp, %esp
|
|
|
|
; KNL_32-NEXT: popl %ebp
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_gather_16f64:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1}
|
|
|
|
; SKX-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX-NEXT: vmovapd %zmm3, %zmm0
|
|
|
|
; SKX-NEXT: vmovapd %zmm4, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_gather_16f64:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: pushl %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; SKX_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; SKX_32-NEXT: movl %esp, %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; SKX_32-NEXT: andl $-64, %esp
|
|
|
|
; SKX_32-NEXT: subl $64, %esp
|
|
|
|
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
|
|
|
|
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX_32-NEXT: vmovapd %zmm2, %zmm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: movl %ebp, %esp
|
|
|
|
; SKX_32-NEXT: popl %ebp
|
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
%res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret <16 x double> %res
|
|
|
|
}
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
|
2015-12-15 16:40:41 +08:00
|
|
|
define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) {
|
|
|
|
; KNL_64-LABEL: test_scatter_16i32:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1}
|
|
|
|
; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm0
|
|
|
|
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_scatter_16i32:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_scatter_16i32:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_scatter_16i32:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
|
|
|
|
; KNL_64-LABEL: test_scatter_16i64:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1}
|
|
|
|
; KNL_64-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_scatter_16i64:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: pushl %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; KNL_32-NEXT: movl %esp, %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; KNL_32-NEXT: andl $-64, %esp
|
|
|
|
; KNL_32-NEXT: subl $64, %esp
|
|
|
|
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1
|
|
|
|
; KNL_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1}
|
|
|
|
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2}
|
|
|
|
; KNL_32-NEXT: movl %ebp, %esp
|
|
|
|
; KNL_32-NEXT: popl %ebp
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_scatter_16i64:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1}
|
|
|
|
; SKX-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_scatter_16i64:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: pushl %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; SKX_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; SKX_32-NEXT: movl %esp, %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; SKX_32-NEXT: andl $-64, %esp
|
|
|
|
; SKX_32-NEXT: subl $64, %esp
|
|
|
|
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1
|
|
|
|
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2}
|
|
|
|
; SKX_32-NEXT: movl %ebp, %esp
|
|
|
|
; SKX_32-NEXT: popl %ebp
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX_32-NEXT: vzeroupper
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
2017-05-03 20:28:54 +08:00
|
|
|
declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) {
|
|
|
|
; KNL_64-LABEL: test_scatter_16f32:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1}
|
|
|
|
; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm0
|
|
|
|
; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_scatter_16f32:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_scatter_16f32:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_scatter_16f32:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX_32-NEXT: vzeroupper
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
2017-05-03 20:28:54 +08:00
|
|
|
declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) {
|
|
|
|
; KNL_64-LABEL: test_scatter_16f64:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; KNL_64-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_64-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1}
|
|
|
|
; KNL_64-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2}
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_64-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_scatter_16f64:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: pushl %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; KNL_32-NEXT: movl %esp, %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; KNL_32-NEXT: andl $-64, %esp
|
|
|
|
; KNL_32-NEXT: subl $64, %esp
|
|
|
|
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
2015-12-27 21:56:16 +08:00
|
|
|
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1
|
|
|
|
; KNL_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1}
|
2017-08-01 06:07:29 +08:00
|
|
|
; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
|
|
|
|
; KNL_32-NEXT: movl %ebp, %esp
|
|
|
|
; KNL_32-NEXT: popl %ebp
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_scatter_16f64:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1}
|
|
|
|
; SKX-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2}
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2015-12-15 16:40:41 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-12-27 21:56:16 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_scatter_16f64:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: pushl %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; SKX_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; SKX_32-NEXT: movl %esp, %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; SKX_32-NEXT: andl $-64, %esp
|
|
|
|
; SKX_32-NEXT: subl $64, %esp
|
|
|
|
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
|
|
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
|
|
|
|
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
|
|
|
|
; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1}
|
2017-08-17 23:40:25 +08:00
|
|
|
; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
|
|
|
|
; SKX_32-NEXT: movl %ebp, %esp
|
|
|
|
; SKX_32-NEXT: popl %ebp
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX_32-NEXT: vzeroupper
|
2015-12-27 21:56:16 +08:00
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
|
2015-12-15 16:40:41 +08:00
|
|
|
ret void
|
|
|
|
}
|
2017-05-03 20:28:54 +08:00
|
|
|
declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
|
2016-08-29 17:12:31 +08:00
|
|
|
|
|
|
|
define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) {
|
|
|
|
; KNL_64-LABEL: test_pr28312:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
|
|
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
|
|
|
|
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
|
|
|
|
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %ymm1, %ymm1
|
2016-08-29 17:12:31 +08:00
|
|
|
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
|
|
|
|
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1}
|
|
|
|
; KNL_64-NEXT: vpaddq %ymm1, %ymm1, %ymm0
|
|
|
|
; KNL_64-NEXT: vpaddq %ymm0, %ymm1, %ymm0
|
|
|
|
; KNL_64-NEXT: retq
|
2016-12-12 18:49:15 +08:00
|
|
|
;
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-LABEL: test_pr28312:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: pushl %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; KNL_32-NEXT: movl %esp, %ebp
|
|
|
|
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; KNL_32-NEXT: andl $-32, %esp
|
|
|
|
; KNL_32-NEXT: subl $32, %esp
|
|
|
|
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
|
|
|
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
|
|
|
|
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
|
|
|
|
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
|
2017-09-04 01:52:25 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %ymm1, %ymm1
|
2017-03-03 17:03:24 +08:00
|
|
|
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
|
|
|
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1
|
|
|
|
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
|
|
; KNL_32-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1}
|
|
|
|
; KNL_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0
|
|
|
|
; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0
|
|
|
|
; KNL_32-NEXT: movl %ebp, %esp
|
|
|
|
; KNL_32-NEXT: popl %ebp
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
2016-12-12 18:49:15 +08:00
|
|
|
; SKX-LABEL: test_pr28312:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
|
|
|
|
; SKX-NEXT: vpgatherqq (,%ymm0), %ymm1 {%k1}
|
|
|
|
; SKX-NEXT: vpaddq %ymm1, %ymm1, %ymm0
|
|
|
|
; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0
|
|
|
|
; SKX-NEXT: retq
|
2017-03-03 17:03:24 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_pr28312:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: pushl %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; SKX_32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; SKX_32-NEXT: movl %esp, %ebp
|
|
|
|
; SKX_32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; SKX_32-NEXT: andl $-32, %esp
|
|
|
|
; SKX_32-NEXT: subl $32, %esp
|
|
|
|
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
|
|
|
|
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
|
|
|
|
; SKX_32-NEXT: vpgatherdq (,%xmm0), %ymm1 {%k1}
|
|
|
|
; SKX_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0
|
|
|
|
; SKX_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0
|
|
|
|
; SKX_32-NEXT: movl %ebp, %esp
|
|
|
|
; SKX_32-NEXT: popl %ebp
|
|
|
|
; SKX_32-NEXT: retl
|
2017-05-03 20:28:54 +08:00
|
|
|
%g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
|
|
|
|
%g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
|
|
|
|
%g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
|
2016-08-29 17:12:31 +08:00
|
|
|
%a = add <4 x i64> %g1, %g2
|
|
|
|
%b = add <4 x i64> %a, %g3
|
|
|
|
ret <4 x i64> %b
|
|
|
|
}
|
2017-05-03 20:28:54 +08:00
|
|
|
declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
|
2017-11-11 02:48:18 +08:00
|
|
|
|
|
|
|
define <8 x i32> @test_global_array(<8 x i64> %indxs) {
|
|
|
|
; KNL_64-LABEL: test_global_array:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-14 01:53:59 +08:00
|
|
|
; KNL_64-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
|
2017-11-11 06:50:50 +08:00
|
|
|
; KNL_64-NEXT: vmovdqa %ymm1, %ymm0
|
2017-11-11 02:48:18 +08:00
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: test_global_array:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-14 01:53:59 +08:00
|
|
|
; KNL_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
|
2017-11-11 06:50:50 +08:00
|
|
|
; KNL_32-NEXT: vmovdqa %ymm1, %ymm0
|
2017-11-11 02:48:18 +08:00
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
2017-11-14 01:53:59 +08:00
|
|
|
; SKX_SMALL-LABEL: test_global_array:
|
|
|
|
; SKX_SMALL: # BB#0:
|
|
|
|
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
|
|
|
|
; SKX_SMALL-NEXT: vmovdqa %ymm1, %ymm0
|
|
|
|
; SKX_SMALL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_LARGE-LABEL: test_global_array:
|
|
|
|
; SKX_LARGE: # BB#0:
|
|
|
|
; SKX_LARGE-NEXT: movabsq $glob_array, %rax
|
|
|
|
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
|
|
|
; SKX_LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
|
|
|
|
; SKX_LARGE-NEXT: vmovdqa %ymm1, %ymm0
|
|
|
|
; SKX_LARGE-NEXT: retq
|
2017-11-11 02:48:18 +08:00
|
|
|
;
|
|
|
|
; SKX_32-LABEL: test_global_array:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
2017-11-14 01:53:59 +08:00
|
|
|
; SKX_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
|
2017-11-11 06:50:50 +08:00
|
|
|
; SKX_32-NEXT: vmovdqa %ymm1, %ymm0
|
2017-11-11 02:48:18 +08:00
|
|
|
; SKX_32-NEXT: retl
|
|
|
|
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs
|
|
|
|
%g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
|
|
|
|
ret <8 x i32> %g
|
|
|
|
}
|
2017-11-16 14:02:05 +08:00
|
|
|
|
|
|
|
define void @v1_scatter(<1 x i32>%a1, <1 x i32*> %ptr, <1 x i1> %mask) {
|
|
|
|
; KNL_64-LABEL: v1_scatter:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: testb $1, %dl
|
|
|
|
; KNL_64-NEXT: jne .LBB42_1
|
|
|
|
; KNL_64-NEXT: # BB#2: # %else
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
; KNL_64-NEXT: .LBB42_1: # %cond.store
|
|
|
|
; KNL_64-NEXT: movl %edi, (%rsi)
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: v1_scatter:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: testb $1, {{[0-9]+}}(%esp)
|
|
|
|
; KNL_32-NEXT: jne .LBB42_1
|
|
|
|
; KNL_32-NEXT: # BB#2: # %else
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
; KNL_32-NEXT: .LBB42_1: # %cond.store
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; KNL_32-NEXT: movl %ecx, (%eax)
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: v1_scatter:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: testb $1, %dl
|
|
|
|
; SKX-NEXT: jne .LBB42_1
|
|
|
|
; SKX-NEXT: # BB#2: # %else
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
; SKX-NEXT: .LBB42_1: # %cond.store
|
|
|
|
; SKX-NEXT: movl %edi, (%rsi)
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: v1_scatter:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: testb $1, {{[0-9]+}}(%esp)
|
|
|
|
; SKX_32-NEXT: jne .LBB42_1
|
|
|
|
; SKX_32-NEXT: # BB#2: # %else
|
|
|
|
; SKX_32-NEXT: retl
|
|
|
|
; SKX_32-NEXT: .LBB42_1: # %cond.store
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; SKX_32-NEXT: movl %ecx, (%eax)
|
|
|
|
; SKX_32-NEXT: retl
|
|
|
|
call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> %a1, <1 x i32*> %ptr, i32 4, <1 x i1> %mask)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
declare void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32>, <1 x i32*>, i32, <1 x i1>)
|
|
|
|
|
|
|
|
define <1 x i32> @v1_gather(<1 x i32*> %ptr, <1 x i1> %mask, <1 x i32> %src0) {
|
|
|
|
; KNL_64-LABEL: v1_gather:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: movl (%rdi), %eax
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: v1_gather:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: movl (%eax), %eax
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: v1_gather:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: movl (%rdi), %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: v1_gather:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; SKX_32-NEXT: movl (%eax), %eax
|
|
|
|
; SKX_32-NEXT: retl
|
|
|
|
%res = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> %ptr, i32 4, <1 x i1> <i1 true>, <1 x i32> %src0)
|
|
|
|
ret <1 x i32>%res
|
|
|
|
}
|
|
|
|
declare <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*>, i32, <1 x i1>, <1 x i32>)
|
2017-11-17 04:23:22 +08:00
|
|
|
|
|
|
|
; Make sure we don't crash when the index element type is larger than i64 and we need to widen the result
|
|
|
|
; This experienced a bad interaction when we widened and then tried to split.
|
|
|
|
define <2 x float> @large_index(float* %base, <2 x i128> %ind, <2 x i1> %mask, <2 x float> %src0) {
|
|
|
|
; KNL_64-LABEL: large_index:
|
|
|
|
; KNL_64: # BB#0:
|
|
|
|
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
|
|
|
; KNL_64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
|
|
; KNL_64-NEXT: vmovaps %xmm0, %xmm0
|
|
|
|
; KNL_64-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; KNL_64-NEXT: vmovq %rsi, %xmm3
|
|
|
|
; KNL_64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; KNL_64-NEXT: vpslld $31, %ymm0, %ymm0
|
|
|
|
; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
|
|
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm1 {%k1}
|
|
|
|
; KNL_64-NEXT: vmovaps %xmm1, %xmm0
|
|
|
|
; KNL_64-NEXT: vzeroupper
|
|
|
|
; KNL_64-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL_32-LABEL: large_index:
|
|
|
|
; KNL_32: # BB#0:
|
|
|
|
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
|
|
|
; KNL_32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
|
|
; KNL_32-NEXT: vmovaps %xmm0, %xmm0
|
|
|
|
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; KNL_32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
|
|
|
; KNL_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
|
|
|
; KNL_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
|
|
|
; KNL_32-NEXT: vpslld $31, %ymm0, %ymm0
|
|
|
|
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
|
|
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm1 {%k1}
|
|
|
|
; KNL_32-NEXT: vmovaps %xmm1, %xmm0
|
|
|
|
; KNL_32-NEXT: vzeroupper
|
|
|
|
; KNL_32-NEXT: retl
|
|
|
|
;
|
|
|
|
; SKX-LABEL: large_index:
|
|
|
|
; SKX: # BB#0:
|
|
|
|
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
|
|
|
|
; SKX-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; SKX-NEXT: vmovq %rsi, %xmm2
|
|
|
|
; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm1 {%k1}
|
|
|
|
; SKX-NEXT: vmovaps %xmm1, %xmm0
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX_32-LABEL: large_index:
|
|
|
|
; SKX_32: # BB#0:
|
|
|
|
; SKX_32-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vptestmq %xmm0, %xmm0, %k1
|
|
|
|
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; SKX_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm1 {%k1}
|
|
|
|
; SKX_32-NEXT: vmovaps %xmm1, %xmm0
|
|
|
|
; SKX_32-NEXT: retl
|
|
|
|
%gep.random = getelementptr float, float* %base, <2 x i128> %ind
|
|
|
|
%res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
|
|
|
|
ret <2 x float>%res
|
|
|
|
}
|