[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=SKX
|
|
|
|
|
|
|
|
define i1 @allones_v16i8_sign(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v16i8_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v16i8_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testw %ax, %ax
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i8_sign(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i8_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i8_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v32i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <32 x i1> %tmp to i32
|
|
|
|
%tmp2 = icmp eq i32 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i8_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i8_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v32i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <32 x i1> %tmp to i32
|
|
|
|
%tmp2 = icmp eq i32 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v64i8_sign(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v64i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: cmpq $-1, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v64i8_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: cmpq $-1, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v64i8_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: cmpq $-1, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v64i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <64 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <64 x i1> %tmp to i64
|
|
|
|
%tmp2 = icmp eq i64 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v64i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v64i8_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v64i8_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v64i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <64 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <64 x i1> %tmp to i64
|
|
|
|
%tmp2 = icmp eq i64 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i16_sign(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v8i16_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $-1, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v8i16_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i16_sign(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i16_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i16_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i16_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i16_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i16_sign(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm4
|
|
|
|
; SSE2-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm5, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm3, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm4, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i16_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i16_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v32i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <32 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <32 x i1> %tmp to i32
|
|
|
|
%tmp2 = icmp eq i32 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm4
|
|
|
|
; SSE2-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm5, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm3, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm4, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i16_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i16_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v32i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <32 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <32 x i1> %tmp to i32
|
|
|
|
%tmp2 = icmp eq i32 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i32_sign(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-11 16:20:02 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v4i32_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $15, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v4i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <4 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <4 x i1> %tmp to i4
|
|
|
|
%tmp2 = icmp eq i4 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-11 16:20:02 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v4i32_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v4i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <4 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <4 x i1> %tmp to i4
|
|
|
|
%tmp2 = icmp eq i4 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i32_sign(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-11 16:20:02 +08:00
|
|
|
; AVX-LABEL: allones_v8i32_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $-1, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-11 16:20:02 +08:00
|
|
|
; AVX-LABEL: allzeros_v8i32_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i32_sign(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm4
|
|
|
|
; SSE2-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm4
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i32_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i32_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm4
|
|
|
|
; SSE2-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm4
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i32_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i32_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i64_sign(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i64_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-11 16:20:02 +08:00
|
|
|
; AVX-LABEL: allones_v4i64_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $15, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v4i64_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <4 x i64> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <4 x i1> %tmp to i4
|
|
|
|
%tmp2 = icmp eq i4 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i64_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-11 16:20:02 +08:00
|
|
|
; AVX-LABEL: allzeros_v4i64_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v4i64_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <4 x i64> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <4 x i1> %tmp to i4
|
|
|
|
%tmp2 = icmp eq i4 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i64_sign(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i64_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm6, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm6, %xmm7
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm7, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm5, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm5, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i64_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i64_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i64_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i64> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i64_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm6, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm6, %xmm7
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm7, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm5, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm5, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i64_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i64_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i64_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i64> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i8_and1(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v16i8_and1:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v16i8_and1:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testw %ax, %ax
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i8_and1(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i8_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i8_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v32i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i8_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i8_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v32i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v64i8_and1(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v64i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm3
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm2
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: cmpq $-1, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v64i8_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: cmpq $-1, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v64i8_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: cmpq $-1, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v64i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <64 x i1> %tmp1 to i64
|
|
|
|
%tmp3 = icmp eq i64 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v64i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm3
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm2
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v64i8_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v64i8_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v64i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <64 x i1> %tmp1 to i64
|
|
|
|
%tmp3 = icmp eq i64 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i16_and1(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v8i16_and1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $-1, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v8i16_and1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i16_and1(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i16_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i16_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i16_and1(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i16_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i16_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v32i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i16_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i16_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v32i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i16_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i16_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i32_and1(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: pslld $31, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-LABEL: allones_v4i32_and1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $15, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v4i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
|
|
|
|
; SKX-NEXT: vptestmd %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: pslld $31, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-LABEL: allzeros_v4i32_and1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v4i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
|
|
|
|
; SKX-NEXT: vptestmd %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i32_and1(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i32_and1:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i32_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SKX-NEXT: vptestmd %ymm1, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i32_and1:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i32_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SKX-NEXT: vptestmd %ymm1, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i32_and1(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i32_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i32_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; SKX-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i32_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i32_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; SKX-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v2i64_and1(<2 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v2i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: psllq $63, %xmm0
|
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: cmpb $3, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v2i64_and1:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $3, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v2i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $3, %al
|
|
|
|
; SKX-NEXT: cmpb $3, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <2 x i1> %tmp1 to i2
|
|
|
|
%tmp3 = icmp eq i2 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v2i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: psllq $63, %xmm0
|
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v2i64_and1:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v2i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $3, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <2 x i1> %tmp1 to i2
|
|
|
|
%tmp3 = icmp eq i2 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i64_and1(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v4i64_and1:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $15, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v4i64_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $15, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v4i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v4i64_and1:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v4i64_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v4i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i64_and1(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i64_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i64_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i64_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i64_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i8_and4(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v16i8_and4:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v16i8_and4:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testw %ax, %ax
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i8_and4(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i8_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i8_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v32i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i8_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i8_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v32i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v64i8_and4(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v64i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm3
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm2
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: cmpq $-1, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v64i8_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: cmpq $-1, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v64i8_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: cmpq $-1, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v64i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <64 x i1> %tmp1 to i64
|
|
|
|
%tmp3 = icmp eq i64 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v64i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm3
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm2
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v64i8_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v64i8_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v64i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <64 x i1> %tmp1 to i64
|
|
|
|
%tmp3 = icmp eq i64 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i16_and4(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v8i16_and4:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $-1, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v8i16_and4:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i16_and4(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i16_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i16_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i16_and4(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i16_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i16_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v32i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i16_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i16_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v32i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i16_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i16_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i32_and4(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: pslld $29, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-LABEL: allones_v4i32_and4:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpslld $29, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $15, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v4i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
|
|
|
|
; SKX-NEXT: vptestmd %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: pslld $29, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-LABEL: allzeros_v4i32_and4:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpslld $29, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v4i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
|
|
|
|
; SKX-NEXT: vptestmd %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i32_and4(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i32_and4:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i32_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SKX-NEXT: vptestmd %ymm1, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i32_and4:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i32_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SKX-NEXT: vptestmd %ymm1, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i32_and4(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i32_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i32_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v16i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; SKX-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i32_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i32_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v16i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; SKX-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v2i64_and4(<2 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v2i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: psllq $61, %xmm0
|
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: cmpb $3, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v2i64_and4:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-NEXT: vpsllq $61, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $3, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v2i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $3, %al
|
|
|
|
; SKX-NEXT: cmpb $3, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <2 x i1> %tmp1 to i2
|
|
|
|
%tmp3 = icmp eq i2 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v2i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: psllq $61, %xmm0
|
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v2i64_and4:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-NEXT: vpsllq $61, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v2i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $3, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <2 x i1> %tmp1 to i2
|
|
|
|
%tmp3 = icmp eq i2 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i64_and4(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v4i64_and4:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $15, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v4i64_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $15, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v4i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v4i64_and4:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v4i64_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v4i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i64_and4(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i64_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i64_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allones_v8i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i64_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i64_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: allzeros_v8i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
2018-09-26 07:28:24 +08:00
|
|
|
|
|
|
|
; The below are IR patterns that should directly represent the behavior of a
|
|
|
|
; MOVMSK instruction.
|
|
|
|
|
|
|
|
define i32 @movmskpd(<2 x double> %x) {
|
|
|
|
; SSE2-LABEL: movmskpd:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-26 07:28:27 +08:00
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
2018-09-26 07:28:24 +08:00
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmskpd:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmskpd:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andl $3, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = bitcast <2 x double> %x to <2 x i64>
|
|
|
|
%b = icmp slt <2 x i64> %a, zeroinitializer
|
|
|
|
%c = bitcast <2 x i1> %b to i2
|
|
|
|
%d = zext i2 %c to i32
|
|
|
|
ret i32 %d
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskps(<4 x float> %x) {
|
|
|
|
; SSE2-LABEL: movmskps:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-26 07:28:27 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
2018-09-26 07:28:24 +08:00
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmskps:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmskps:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andl $15, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = bitcast <4 x float> %x to <4 x i32>
|
|
|
|
%b = icmp slt <4 x i32> %a, zeroinitializer
|
|
|
|
%c = bitcast <4 x i1> %b to i4
|
|
|
|
%d = zext i4 %c to i32
|
|
|
|
ret i32 %d
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskpd256(<4 x double> %x) {
|
|
|
|
; SSE2-LABEL: movmskpd256:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
|
2018-09-26 07:28:24 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:27 +08:00
|
|
|
; AVX-LABEL: movmskpd256:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
2018-09-26 07:28:24 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: movmskpd256:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andl $15, %eax
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = bitcast <4 x double> %x to <4 x i64>
|
|
|
|
%b = icmp slt <4 x i64> %a, zeroinitializer
|
|
|
|
%c = bitcast <4 x i1> %b to i4
|
|
|
|
%d = zext i4 %c to i32
|
|
|
|
ret i32 %d
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskps256(<8 x float> %x) {
|
|
|
|
; SSE2-LABEL: movmskps256:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: movzbl %al, %eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:27 +08:00
|
|
|
; AVX-LABEL: movmskps256:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
2018-09-26 07:28:24 +08:00
|
|
|
;
|
|
|
|
; SKX-LABEL: movmskps256:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovb %k0, %eax
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = bitcast <8 x float> %x to <8 x i32>
|
|
|
|
%b = icmp slt <8 x i32> %a, zeroinitializer
|
|
|
|
%c = bitcast <8 x i1> %b to i8
|
|
|
|
%d = zext i8 %c to i32
|
|
|
|
ret i32 %d
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskb(<16 x i8> %x) {
|
|
|
|
; SSE2-LABEL: movmskb:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmskb:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmskb:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovw %k0, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = icmp slt <16 x i8> %x, zeroinitializer
|
|
|
|
%b = bitcast <16 x i1> %a to i16
|
|
|
|
%c = zext i16 %b to i32
|
|
|
|
ret i32 %c
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskb256(<32 x i8> %x) {
|
|
|
|
; SSE2-LABEL: movmskb256:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: shll $16, %eax
|
|
|
|
; SSE2-NEXT: orl %ecx, %eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: movmskb256:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: shll $16, %eax
|
|
|
|
; AVX1-NEXT: orl %ecx, %eax
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: movmskb256:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmskb256:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = icmp slt <32 x i8> %x, zeroinitializer
|
|
|
|
%b = bitcast <32 x i1> %a to i32
|
|
|
|
ret i32 %b
|
|
|
|
}
|