[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
2018-10-26 02:06:25 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=KNL
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=SKX
|
|
|
|
|
|
|
|
define i1 @allones_v16i8_sign(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v16i8_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i8_sign:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; KNL-NEXT: cmpw $-1, %ax
|
|
|
|
; KNL-NEXT: sete %al
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v16i8_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testw %ax, %ax
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i8_sign:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; KNL-NEXT: testw %ax, %ax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i8_sign(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i8_sign:
|
|
|
|
; AVX1: # %bb.0:
|
2019-02-04 23:43:36 +08:00
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i8_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v32i8_sign:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; KNL-NEXT: cmpl $-1, %eax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v32i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <32 x i1> %tmp to i32
|
|
|
|
%tmp2 = icmp eq i32 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i8_sign:
|
|
|
|
; AVX1: # %bb.0:
|
2019-02-04 23:43:36 +08:00
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i8_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v32i8_sign:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; KNL-NEXT: testl %eax, %eax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v32i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <32 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <32 x i1> %tmp to i32
|
|
|
|
%tmp2 = icmp eq i32 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v64i8_sign(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v64i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: cmpq $-1, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v64i8_sign:
|
|
|
|
; AVX1: # %bb.0:
|
2019-02-04 23:43:36 +08:00
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
2019-02-04 23:43:36 +08:00
|
|
|
; AVX1-NEXT: vpmovmskb %xmm1, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: cmpq $-1, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v64i8_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: cmpq $-1, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v64i8_sign:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 06:42:58 +08:00
|
|
|
; KNL-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; KNL-NEXT: orq %rax, %rcx
|
|
|
|
; KNL-NEXT: cmpq $-1, %rcx
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v64i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <64 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <64 x i1> %tmp to i64
|
|
|
|
%tmp2 = icmp eq i64 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v64i8_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v64i8_sign:
|
|
|
|
; AVX1: # %bb.0:
|
2019-02-04 23:43:36 +08:00
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
2019-02-04 23:43:36 +08:00
|
|
|
; AVX1-NEXT: vpmovmskb %xmm1, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v64i8_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v64i8_sign:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 06:42:58 +08:00
|
|
|
; KNL-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; KNL-NEXT: orq %rax, %rcx
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v64i8_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <64 x i8> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <64 x i1> %tmp to i64
|
|
|
|
%tmp2 = icmp eq i64 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i16_sign(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v8i16_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $-1, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i16_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v8i16_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i16_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i16_sign(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i16_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i16_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i16_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: setb %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i16_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i16_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i16_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i16_sign(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm4
|
|
|
|
; SSE2-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm5, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm3, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm4, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i16_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i16_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v32i16_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %ecx
|
|
|
|
; KNL-NEXT: shll $16, %ecx
|
|
|
|
; KNL-NEXT: orl %eax, %ecx
|
|
|
|
; KNL-NEXT: cmpl $-1, %ecx
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v32i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <32 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <32 x i1> %tmp to i32
|
|
|
|
%tmp2 = icmp eq i32 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i16_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm4
|
|
|
|
; SSE2-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm5, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm3, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm4
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm4, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i16_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i16_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v32i16_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %ecx
|
|
|
|
; KNL-NEXT: shll $16, %ecx
|
|
|
|
; KNL-NEXT: orl %eax, %ecx
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v32i16_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovw2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <32 x i16> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <32 x i1> %tmp to i32
|
|
|
|
%tmp2 = icmp eq i32 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i32_sign(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-11 16:20:02 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v4i32_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $15, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v4i32_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb $15, %al
|
|
|
|
; KNL-NEXT: cmpb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v4i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <4 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <4 x i1> %tmp to i4
|
|
|
|
%tmp2 = icmp eq i4 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-11 16:20:02 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v4i32_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v4i32_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v4i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <4 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <4 x i1> %tmp to i4
|
|
|
|
%tmp2 = icmp eq i4 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i32_sign(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-11 16:20:02 +08:00
|
|
|
; AVX-LABEL: allones_v8i32_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $-1, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i32_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-11 16:20:02 +08:00
|
|
|
; AVX-LABEL: allzeros_v8i32_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i32_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i32_sign(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm4
|
|
|
|
; SSE2-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm4
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i32_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i32_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i32_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: setb %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i32_sign:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm4
|
|
|
|
; SSE2-NEXT: pxor %xmm5, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm4
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm4, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i32_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i32_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i32_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i32_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <16 x i32> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <16 x i1> %tmp to i16
|
|
|
|
%tmp2 = icmp eq i16 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i64_sign(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i64_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
2018-12-15 19:36:36 +08:00
|
|
|
; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: por %xmm4, %xmm1
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
2018-12-15 19:36:36 +08:00
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm0
|
|
|
|
; SSE2-NEXT: por %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-11 16:20:02 +08:00
|
|
|
; AVX-LABEL: allones_v4i64_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $15, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v4i64_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb $15, %al
|
|
|
|
; KNL-NEXT: cmpb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v4i64_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <4 x i64> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <4 x i1> %tmp to i4
|
|
|
|
%tmp2 = icmp eq i4 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i64_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
2018-12-15 19:36:36 +08:00
|
|
|
; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: por %xmm4, %xmm1
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
2018-12-15 19:36:36 +08:00
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm0
|
|
|
|
; SSE2-NEXT: por %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-11 16:20:02 +08:00
|
|
|
; AVX-LABEL: allzeros_v4i64_sign:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v4i64_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v4i64_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <4 x i64> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <4 x i1> %tmp to i4
|
|
|
|
%tmp2 = icmp eq i4 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i64_sign(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i64_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm6, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm6, %xmm7
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm7, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm5, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm5, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i64_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i64_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i64_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i64_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i64> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, -1
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i64_sign:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm6, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm6, %xmm7
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm7, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm5, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pxor %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: movdqa %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; SSE2-NEXT: pand %xmm5, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
|
|
|
; SSE2-NEXT: por %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i64_sign:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i64_sign:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i64_sign:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i64_sign:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = icmp slt <8 x i64> %arg, zeroinitializer
|
|
|
|
%tmp1 = bitcast <8 x i1> %tmp to i8
|
|
|
|
%tmp2 = icmp eq i8 %tmp1, 0
|
|
|
|
ret i1 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i8_and1(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v16i8_and1:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i8_and1:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; KNL-NEXT: cmpw $-1, %ax
|
|
|
|
; KNL-NEXT: sete %al
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v16i8_and1:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testw %ax, %ax
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i8_and1:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; KNL-NEXT: testw %ax, %ax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i8_and1(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i8_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i8_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v32i8_and1:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; KNL-NEXT: cmpl $-1, %eax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v32i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i8_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i8_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v32i8_and1:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; KNL-NEXT: testl %eax, %eax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v32i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v64i8_and1(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v64i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm3
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm2
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: cmpq $-1, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v64i8_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: cmpq $-1, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v64i8_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: cmpq $-1, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v64i8_and1:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 06:42:58 +08:00
|
|
|
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; KNL-NEXT: orq %rax, %rcx
|
|
|
|
; KNL-NEXT: cmpq $-1, %rcx
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v64i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <64 x i1> %tmp1 to i64
|
|
|
|
%tmp3 = icmp eq i64 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v64i8_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $7, %xmm3
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm2
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $7, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v64i8_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v64i8_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v64i8_and1:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 06:42:58 +08:00
|
|
|
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; KNL-NEXT: orq %rax, %rcx
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v64i8_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
|
|
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <64 x i1> %tmp1 to i64
|
|
|
|
%tmp3 = icmp eq i64 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i16_and1(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v8i16_and1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $-1, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i16_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v8i16_and1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i16_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i16_and1(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i16_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i16_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i16_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: setb %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i16_and1(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i16_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i16_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v32i16_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %ecx
|
|
|
|
; KNL-NEXT: shll $16, %ecx
|
|
|
|
; KNL-NEXT: orl %eax, %ecx
|
|
|
|
; KNL-NEXT: cmpl $-1, %ecx
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v32i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i16_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i16_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v32i16_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %ecx
|
|
|
|
; KNL-NEXT: shll $16, %ecx
|
|
|
|
; KNL-NEXT: orl %eax, %ecx
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v32i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i16_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i16_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i16_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i16_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
|
|
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i16_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
|
|
|
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i32_and1(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: pslld $31, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-LABEL: allones_v4i32_and1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $15, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v4i32_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb $15, %al
|
|
|
|
; KNL-NEXT: cmpb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v4i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: pslld $31, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-LABEL: allzeros_v4i32_and1:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v4i32_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v4i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i32_and1(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i32_and1:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i32_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i32_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i32_and1:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i32_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i32_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i32_and1(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i32_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i32_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i32_and1:
|
|
|
|
; KNL: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: setb %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i32_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i32_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i32_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i32_and1:
|
|
|
|
; KNL: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i32_and1:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
|
|
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v2i64_and1(<2 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v2i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: psllq $63, %xmm0
|
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: cmpb $3, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v2i64_and1:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $3, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v2i64_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
|
|
|
|
; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb $3, %al
|
|
|
|
; KNL-NEXT: cmpb $3, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v2i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $3, %al
|
|
|
|
; SKX-NEXT: cmpb $3, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <2 x i1> %tmp1 to i2
|
|
|
|
%tmp3 = icmp eq i2 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v2i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: psllq $63, %xmm0
|
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v2i64_and1:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v2i64_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
|
|
|
|
; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb $3, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v2i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $3, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <2 x i1> %tmp1 to i2
|
|
|
|
%tmp3 = icmp eq i2 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i64_and1(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v4i64_and1:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $15, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v4i64_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $15, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v4i64_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
|
|
|
|
; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb $15, %al
|
|
|
|
; KNL-NEXT: cmpb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v4i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v4i64_and1:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v4i64_and1:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v4i64_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
|
|
|
|
; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v4i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i64_and1(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i64_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i64_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i64_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i64_and1:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i64_and1:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i64_and1:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i64_and1:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i64_and1:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
|
|
|
|
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i8_and4(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v16i8_and4:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i8_and4:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; KNL-NEXT: cmpw $-1, %ax
|
|
|
|
; KNL-NEXT: sete %al
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v16i8_and4:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testw %ax, %ax
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i8_and4:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; KNL-NEXT: testw %ax, %ax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i8_and4(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i8_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i8_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v32i8_and4:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; KNL-NEXT: cmpl $-1, %eax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v32i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i8_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i8_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v32i8_and4:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; KNL-NEXT: testl %eax, %eax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v32i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v64i8_and4(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v64i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm3
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm2
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: cmpq $-1, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v64i8_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: cmpq $-1, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v64i8_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: cmpq $-1, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v64i8_and4:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 06:42:58 +08:00
|
|
|
; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; KNL-NEXT: orq %rax, %rcx
|
|
|
|
; KNL-NEXT: cmpq $-1, %rcx
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v64i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <64 x i1> %tmp1 to i64
|
|
|
|
%tmp3 = icmp eq i64 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v64i8_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; SSE2-NEXT: psllw $5, %xmm3
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm2
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm1
|
|
|
|
; SSE2-NEXT: psllw $5, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm3, %edx
|
|
|
|
; SSE2-NEXT: shll $16, %edx
|
|
|
|
; SSE2-NEXT: orl %eax, %edx
|
|
|
|
; SSE2-NEXT: shlq $32, %rdx
|
|
|
|
; SSE2-NEXT: orq %rcx, %rdx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v64i8_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm3, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %edx
|
|
|
|
; AVX1-NEXT: shll $16, %edx
|
|
|
|
; AVX1-NEXT: orl %eax, %edx
|
|
|
|
; AVX1-NEXT: shlq $32, %rdx
|
|
|
|
; AVX1-NEXT: orq %rcx, %rdx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v64i8_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-22 13:08:38 +08:00
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; AVX2-NEXT: shlq $32, %rax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; AVX2-NEXT: orq %rax, %rcx
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v64i8_and4:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 06:42:58 +08:00
|
|
|
; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm1, %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %ecx
|
|
|
|
; KNL-NEXT: orq %rax, %rcx
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v64i8_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestq %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
|
|
|
%tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <64 x i1> %tmp1 to i64
|
|
|
|
%tmp3 = icmp eq i64 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i16_and4(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v8i16_and4:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $-1, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i16_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v8i16_and4:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i16_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i16_and4(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i16_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i16_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i16_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: setb %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v32i16_and4(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v32i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: cmpl $-1, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v32i16_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: cmpl $-1, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v32i16_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpl $-1, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v32i16_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %ecx
|
|
|
|
; KNL-NEXT: shll $16, %ecx
|
|
|
|
; KNL-NEXT: orl %eax, %ecx
|
|
|
|
; KNL-NEXT: cmpl $-1, %ecx
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v32i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v32i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %ecx
|
|
|
|
; SSE2-NEXT: shll $16, %ecx
|
|
|
|
; SSE2-NEXT: orl %eax, %ecx
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v32i16_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
|
|
|
; AVX1-NEXT: shll $16, %ecx
|
|
|
|
; AVX1-NEXT: orl %eax, %ecx
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v32i16_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testl %eax, %eax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v32i16_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %ecx
|
|
|
|
; KNL-NEXT: shll $16, %ecx
|
|
|
|
; KNL-NEXT: orl %eax, %ecx
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v32i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestd %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <32 x i1> %tmp1 to i32
|
|
|
|
%tmp3 = icmp eq i32 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i16_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i16_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i16_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i16_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
|
|
|
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i16_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
|
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
|
|
|
%tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i32_and4(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: pslld $29, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-LABEL: allones_v4i32_and4:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpslld $29, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $15, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v4i32_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb $15, %al
|
|
|
|
; KNL-NEXT: cmpb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v4i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: pslld $29, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-LABEL: allzeros_v4i32_and4:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpslld $29, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v4i32_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v4i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i32_and4(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i32_and4:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i32_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i32_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i32_and4:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i32_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i32_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v16i32_and4(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v16i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; SSE2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v16i32_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX1-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v16i32_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-06 02:13:36 +08:00
|
|
|
; AVX2-NEXT: cmpw $-1, %ax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v16i32_and4:
|
|
|
|
; KNL: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: setb %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v16i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v16i32_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4,4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: packsswb %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: testw %ax, %ax
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v16i32_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: testw %ax, %ax
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v16i32_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX2-NEXT: testw %ax, %ax
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v16i32_and4:
|
|
|
|
; KNL: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: kortestw %k0, %k0
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v16i32_and4:
|
|
|
|
; SKX: # %bb.0:
|
2018-10-27 01:21:26 +08:00
|
|
|
; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-NEXT: kortestw %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <16 x i1> %tmp1 to i16
|
|
|
|
%tmp3 = icmp eq i16 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v2i64_and4(<2 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v2i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: psllq $61, %xmm0
|
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: cmpb $3, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allones_v2i64_and4:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-NEXT: vpsllq $61, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: cmpb $3, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v2i64_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
|
|
|
|
; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb $3, %al
|
|
|
|
; KNL-NEXT: cmpb $3, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v2i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $3, %al
|
|
|
|
; SKX-NEXT: cmpb $3, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <2 x i1> %tmp1 to i2
|
|
|
|
%tmp3 = icmp eq i2 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v2i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; SSE2-NEXT: psllq $61, %xmm0
|
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: allzeros_v2i64_and4:
|
|
|
|
; AVX: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX-NEXT: vpsllq $61, %xmm0, %xmm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb %al, %al
|
|
|
|
; AVX-NEXT: sete %al
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v2i64_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
|
|
|
|
; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb $3, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v2i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $3, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <2 x i1> %tmp1 to i2
|
|
|
|
%tmp3 = icmp eq i2 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v4i64_and4(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v4i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $15, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v4i64_and4:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $15, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v4i64_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $15, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v4i64_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
|
|
|
|
; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb $15, %al
|
|
|
|
; KNL-NEXT: cmpb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v4i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb $15, %al
|
|
|
|
; SKX-NEXT: cmpb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v4i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: movmskps %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v4i64_and4:
|
|
|
|
; AVX1: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v4i64_and4:
|
|
|
|
; AVX2: # %bb.0:
|
2018-09-16 00:23:33 +08:00
|
|
|
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v4i64_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4]
|
|
|
|
; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb $15, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v4i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: testb $15, %al
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <4 x i1> %tmp1 to i4
|
|
|
|
%tmp3 = icmp eq i4 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allones_v8i64_and4(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allones_v8i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: cmpb $-1, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allones_v8i64_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: cmpb $-1, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allones_v8i64_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: cmpb $-1, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allones_v8i64_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: cmpb $-1, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allones_v8i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: setb %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, -1
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
|
|
|
|
; SSE2-LABEL: allzeros_v8i64_and4:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4,4]
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pand %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm5
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: packssdw %xmm5, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: testb %al, %al
|
|
|
|
; SSE2-NEXT: sete %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: allzeros_v8i64_and4:
|
|
|
|
; AVX1: # %bb.0:
|
|
|
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,4,4]
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX1-NEXT: testb %al, %al
|
|
|
|
; AVX1-NEXT: sete %al
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: allzeros_v8i64_and4:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4,4,4,4]
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
|
|
|
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
|
|
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX2-NEXT: testb %al, %al
|
|
|
|
; AVX2-NEXT: sete %al
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: allzeros_v8i64_and4:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: testb %al, %al
|
|
|
|
; KNL-NEXT: sete %al
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
[X86] Add test cases inspired by PR38840.
These are test cases inspired by sequences like below for extracting the same bit from every vector element and checking for all zeros/ones.
define i1 @and256_x8(<8 x i32>) {
%a = trunc <8 x i32> %0 to <8 x i1>
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
This is what the above looks like after InstCombine.
define i1 @and256_x8_opt(<8 x i32>) {
%2 = and <8 x i32> %0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%a = icmp ne <8 x i32> %2, zeroinitializer
%b = bitcast <8 x i1> %a to i8
%d = icmp eq i8 %b, -1
ret i1 %d
}
llvm-svn: 341908
2018-09-11 15:23:29 +08:00
|
|
|
; SKX-LABEL: allzeros_v8i64_and4:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
|
|
; SKX-NEXT: sete %al
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
|
|
|
|
%tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
|
|
|
|
%tmp2 = bitcast <8 x i1> %tmp1 to i8
|
|
|
|
%tmp3 = icmp eq i8 %tmp2, 0
|
|
|
|
ret i1 %tmp3
|
|
|
|
}
|
2018-09-26 07:28:24 +08:00
|
|
|
|
|
|
|
; The below are IR patterns that should directly represent the behavior of a
|
|
|
|
; MOVMSK instruction.
|
|
|
|
|
|
|
|
define i32 @movmskpd(<2 x double> %x) {
|
|
|
|
; SSE2-LABEL: movmskpd:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-26 07:28:27 +08:00
|
|
|
; SSE2-NEXT: movmskpd %xmm0, %eax
|
2018-09-26 07:28:24 +08:00
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmskpd:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskpd %xmm0, %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: movmskpd:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andl $3, %eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:24 +08:00
|
|
|
; SKX-LABEL: movmskpd:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andl $3, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = bitcast <2 x double> %x to <2 x i64>
|
|
|
|
%b = icmp slt <2 x i64> %a, zeroinitializer
|
|
|
|
%c = bitcast <2 x i1> %b to i2
|
|
|
|
%d = zext i2 %c to i32
|
|
|
|
ret i32 %d
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskps(<4 x float> %x) {
|
|
|
|
; SSE2-LABEL: movmskps:
|
|
|
|
; SSE2: # %bb.0:
|
2018-09-26 07:28:27 +08:00
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
2018-09-26 07:28:24 +08:00
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmskps:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %xmm0, %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: movmskps:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andl $15, %eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:24 +08:00
|
|
|
; SKX-LABEL: movmskps:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andl $15, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = bitcast <4 x float> %x to <4 x i32>
|
|
|
|
%b = icmp slt <4 x i32> %a, zeroinitializer
|
|
|
|
%c = bitcast <4 x i1> %b to i4
|
|
|
|
%d = zext i4 %c to i32
|
|
|
|
ret i32 %d
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskpd256(<4 x double> %x) {
|
|
|
|
; SSE2-LABEL: movmskpd256:
|
|
|
|
; SSE2: # %bb.0:
|
2018-10-10 03:05:50 +08:00
|
|
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
|
2018-09-26 07:28:24 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
2018-12-15 19:36:36 +08:00
|
|
|
; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm4
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm1
|
|
|
|
; SSE2-NEXT: por %xmm4, %xmm1
|
2018-09-26 07:28:24 +08:00
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
2018-12-15 19:36:36 +08:00
|
|
|
; SSE2-NEXT: movdqa %xmm2, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
|
|
|
|
; SSE2-NEXT: pand %xmm3, %xmm0
|
|
|
|
; SSE2-NEXT: por %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: movmskps %xmm0, %eax
|
2018-09-26 07:28:24 +08:00
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:27 +08:00
|
|
|
; AVX-LABEL: movmskpd256:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
2018-09-26 07:28:24 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: movmskpd256:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andl $15, %eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:24 +08:00
|
|
|
; SKX-LABEL: movmskpd256:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andl $15, %eax
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = bitcast <4 x double> %x to <4 x i64>
|
|
|
|
%b = icmp slt <4 x i64> %a, zeroinitializer
|
|
|
|
%c = bitcast <4 x i1> %b to i4
|
|
|
|
%d = zext i4 %c to i32
|
|
|
|
ret i32 %d
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskps256(<8 x float> %x) {
|
|
|
|
; SSE2-LABEL: movmskps256:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pxor %xmm2, %xmm2
|
|
|
|
; SSE2-NEXT: pxor %xmm3, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
|
|
|
; SSE2-NEXT: packsswb %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
|
|
; SSE2-NEXT: movzbl %al, %eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:27 +08:00
|
|
|
; AVX-LABEL: movmskps256:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
2018-09-26 07:28:24 +08:00
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: movmskps256:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:24 +08:00
|
|
|
; SKX-LABEL: movmskps256:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovd2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovb %k0, %eax
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = bitcast <8 x float> %x to <8 x i32>
|
|
|
|
%b = icmp slt <8 x i32> %a, zeroinitializer
|
|
|
|
%c = bitcast <8 x i1> %b to i8
|
|
|
|
%d = zext i8 %c to i32
|
|
|
|
ret i32 %d
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskb(<16 x i8> %x) {
|
|
|
|
; SSE2-LABEL: movmskb:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmskb:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: movmskb:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpmovmskb %xmm0, %eax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:24 +08:00
|
|
|
; SKX-LABEL: movmskb:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
|
|
; SKX-NEXT: kmovw %k0, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = icmp slt <16 x i8> %x, zeroinitializer
|
|
|
|
%b = bitcast <16 x i1> %a to i16
|
|
|
|
%c = zext i16 %b to i32
|
|
|
|
ret i32 %c
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @movmskb256(<32 x i8> %x) {
|
|
|
|
; SSE2-LABEL: movmskb256:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm0, %ecx
|
|
|
|
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
|
|
; SSE2-NEXT: shll $16, %eax
|
|
|
|
; SSE2-NEXT: orl %ecx, %eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: movmskb256:
|
|
|
|
; AVX1: # %bb.0:
|
2019-02-04 23:43:36 +08:00
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
2018-09-26 07:28:24 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
|
|
; AVX1-NEXT: shll $16, %eax
|
|
|
|
; AVX1-NEXT: orl %ecx, %eax
|
|
|
|
; AVX1-NEXT: vzeroupper
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: movmskb256:
|
|
|
|
; AVX2: # %bb.0:
|
|
|
|
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
|
|
|
; AVX2-NEXT: vzeroupper
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-LABEL: movmskb256:
|
|
|
|
; KNL: # %bb.0:
|
2019-01-06 05:40:07 +08:00
|
|
|
; KNL-NEXT: vpmovmskb %ymm0, %eax
|
2018-10-26 02:06:25 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2018-09-26 07:28:24 +08:00
|
|
|
; SKX-LABEL: movmskb256:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%a = icmp slt <32 x i8> %x, zeroinitializer
|
|
|
|
%b = bitcast <32 x i1> %a to i32
|
|
|
|
ret i32 %b
|
|
|
|
}
|
2019-03-22 01:57:56 +08:00
|
|
|
|
|
|
|
; Multiple extract elements from a vector compare.
|
|
|
|
|
|
|
|
define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
|
|
|
|
; SSE2-LABEL: movmsk_v16i8:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
|
|
|
; SSE2-NEXT: xorb -{{[0-9]+}}(%rsp), %al
|
|
|
|
; SSE2-NEXT: andb -{{[0-9]+}}(%rsp), %al
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmsk_v16i8:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpextrb $3, %xmm0, %eax
|
|
|
|
; AVX-NEXT: vpextrb $8, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: xorl %eax, %ecx
|
|
|
|
; AVX-NEXT: vpextrb $15, %xmm0, %eax
|
|
|
|
; AVX-NEXT: andl %ecx, %eax
|
|
|
|
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL-LABEL: movmsk_v16i8:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $15, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %ecx
|
|
|
|
; KNL-NEXT: kshiftrw $8, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %edx
|
|
|
|
; KNL-NEXT: kshiftrw $3, %k0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: xorb %dl, %al
|
|
|
|
; KNL-NEXT: andb %cl, %al
|
|
|
|
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmsk_v16i8:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: kshiftrw $15, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %ecx
|
|
|
|
; SKX-NEXT: kshiftrw $8, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %edx
|
|
|
|
; SKX-NEXT: kshiftrw $3, %k0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: xorb %dl, %al
|
|
|
|
; SKX-NEXT: andb %cl, %al
|
|
|
|
; SKX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp = icmp eq <16 x i8> %x, %y
|
|
|
|
%e1 = extractelement <16 x i1> %cmp, i32 3
|
|
|
|
%e2 = extractelement <16 x i1> %cmp, i32 8
|
|
|
|
%e3 = extractelement <16 x i1> %cmp, i32 15
|
|
|
|
%u1 = xor i1 %e1, %e2
|
|
|
|
%u2 = and i1 %e3, %u1
|
|
|
|
ret i1 %u2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
|
|
|
|
; SSE2-LABEL: movmsk_v8i16:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: movd %xmm0, %ecx
|
|
|
|
; SSE2-NEXT: pextrw $1, %xmm0, %edx
|
|
|
|
; SSE2-NEXT: pextrw $7, %xmm0, %esi
|
|
|
|
; SSE2-NEXT: pextrw $4, %xmm0, %eax
|
|
|
|
; SSE2-NEXT: andl %esi, %eax
|
|
|
|
; SSE2-NEXT: andl %edx, %eax
|
|
|
|
; SSE2-NEXT: andl %ecx, %eax
|
|
|
|
; SSE2-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmsk_v8i16:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vmovd %xmm0, %ecx
|
|
|
|
; AVX-NEXT: vpextrw $1, %xmm0, %edx
|
|
|
|
; AVX-NEXT: vpextrw $7, %xmm0, %esi
|
|
|
|
; AVX-NEXT: vpextrw $4, %xmm0, %eax
|
|
|
|
; AVX-NEXT: andl %esi, %eax
|
|
|
|
; AVX-NEXT: andl %edx, %eax
|
|
|
|
; AVX-NEXT: andl %ecx, %eax
|
|
|
|
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL-LABEL: movmsk_v8i16:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $4, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %ecx
|
|
|
|
; KNL-NEXT: kshiftrw $7, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %eax
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %edx
|
|
|
|
; KNL-NEXT: kmovw %k0, %esi
|
|
|
|
; KNL-NEXT: andb %cl, %al
|
|
|
|
; KNL-NEXT: andb %dl, %al
|
|
|
|
; KNL-NEXT: andb %sil, %al
|
|
|
|
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmsk_v8i16:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: kshiftrb $4, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %ecx
|
|
|
|
; SKX-NEXT: kshiftrb $7, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %eax
|
|
|
|
; SKX-NEXT: kshiftrb $1, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %edx
|
|
|
|
; SKX-NEXT: kmovd %k0, %esi
|
|
|
|
; SKX-NEXT: andb %cl, %al
|
|
|
|
; SKX-NEXT: andb %dl, %al
|
|
|
|
; SKX-NEXT: andb %sil, %al
|
|
|
|
; SKX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp = icmp sgt <8 x i16> %x, %y
|
|
|
|
%e1 = extractelement <8 x i1> %cmp, i32 0
|
|
|
|
%e2 = extractelement <8 x i1> %cmp, i32 1
|
|
|
|
%e3 = extractelement <8 x i1> %cmp, i32 7
|
|
|
|
%e4 = extractelement <8 x i1> %cmp, i32 4
|
|
|
|
%u1 = and i1 %e1, %e2
|
|
|
|
%u2 = and i1 %e3, %e4
|
|
|
|
%u3 = and i1 %u1, %u2
|
|
|
|
ret i1 %u3
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
|
|
|
; SSE2-LABEL: movmsk_v4i32:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
|
|
|
; SSE2-NEXT: movd %xmm0, %ecx
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
|
|
|
|
; SSE2-NEXT: movd %xmm0, %eax
|
|
|
|
; SSE2-NEXT: xorl %ecx, %eax
|
|
|
|
; SSE2-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmsk_v4i32:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vpextrd $2, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; AVX-NEXT: xorl %ecx, %eax
|
|
|
|
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL-LABEL: movmsk_v4i32:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $3, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %ecx
|
|
|
|
; KNL-NEXT: kshiftrw $2, %k0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: xorb %cl, %al
|
|
|
|
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmsk_v4i32:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
|
|
|
|
; SKX-NEXT: kshiftrb $3, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %ecx
|
|
|
|
; SKX-NEXT: kshiftrb $2, %k0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: xorb %cl, %al
|
|
|
|
; SKX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp = icmp slt <4 x i32> %x, %y
|
|
|
|
%e1 = extractelement <4 x i1> %cmp, i32 2
|
|
|
|
%e2 = extractelement <4 x i1> %cmp, i32 3
|
|
|
|
%u1 = xor i1 %e1, %e2
|
|
|
|
ret i1 %u1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @movmsk_v2i64(<2 x i64> %x, <2 x i64> %y) {
|
|
|
|
; SSE2-LABEL: movmsk_v2i64:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
|
|
|
|
; SSE2-NEXT: pand %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
|
|
|
; SSE2-NEXT: pxor %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: movd %xmm0, %ecx
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
|
|
; SSE2-NEXT: movd %xmm0, %eax
|
|
|
|
; SSE2-NEXT: andl %ecx, %eax
|
|
|
|
; SSE2-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmsk_v2i64:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
|
|
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpextrd $2, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: vmovd %xmm0, %eax
|
|
|
|
; AVX-NEXT: andl %ecx, %eax
|
|
|
|
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL-LABEL: movmsk_v2i64:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %ecx
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb %cl, %al
|
|
|
|
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmsk_v2i64:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: kshiftrb $1, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %ecx
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb %cl, %al
|
|
|
|
; SKX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp = icmp ne <2 x i64> %x, %y
|
|
|
|
%e1 = extractelement <2 x i1> %cmp, i32 0
|
|
|
|
%e2 = extractelement <2 x i1> %cmp, i32 1
|
|
|
|
%u1 = and i1 %e1, %e2
|
|
|
|
ret i1 %u1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
|
|
|
|
; SSE2-LABEL: movmsk_v4f32:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: movaps %xmm0, %xmm2
|
|
|
|
; SSE2-NEXT: cmpeqps %xmm1, %xmm2
|
|
|
|
; SSE2-NEXT: cmpunordps %xmm1, %xmm0
|
|
|
|
; SSE2-NEXT: orps %xmm2, %xmm0
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
|
|
|
; SSE2-NEXT: movd %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
|
|
|
; SSE2-NEXT: movd %xmm1, %edx
|
|
|
|
; SSE2-NEXT: pextrw $6, %xmm0, %eax
|
|
|
|
; SSE2-NEXT: orl %edx, %eax
|
|
|
|
; SSE2-NEXT: orl %ecx, %eax
|
|
|
|
; SSE2-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmsk_v4f32:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vextractps $1, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: vextractps $2, %xmm0, %edx
|
|
|
|
; AVX-NEXT: vpextrb $12, %xmm0, %eax
|
|
|
|
; AVX-NEXT: orl %edx, %eax
|
|
|
|
; AVX-NEXT: orl %ecx, %eax
|
|
|
|
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL-LABEL: movmsk_v4f32:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $3, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %ecx
|
|
|
|
; KNL-NEXT: kshiftrw $2, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %eax
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, %edx
|
|
|
|
; KNL-NEXT: orb %cl, %al
|
|
|
|
; KNL-NEXT: orb %dl, %al
|
|
|
|
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmsk_v4f32:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: kshiftrb $3, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %ecx
|
|
|
|
; SKX-NEXT: kshiftrb $2, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %eax
|
|
|
|
; SKX-NEXT: kshiftrb $1, %k0, %k0
|
|
|
|
; SKX-NEXT: kmovd %k0, %edx
|
|
|
|
; SKX-NEXT: orb %cl, %al
|
|
|
|
; SKX-NEXT: orb %dl, %al
|
|
|
|
; SKX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp = fcmp ueq <4 x float> %x, %y
|
|
|
|
%e1 = extractelement <4 x i1> %cmp, i32 1
|
|
|
|
%e2 = extractelement <4 x i1> %cmp, i32 2
|
|
|
|
%e3 = extractelement <4 x i1> %cmp, i32 3
|
|
|
|
%u1 = or i1 %e1, %e2
|
|
|
|
%u2 = or i1 %u1, %e3
|
|
|
|
ret i1 %u2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i1 @movmsk_v2f64(<2 x double> %x, <2 x double> %y) {
|
|
|
|
; SSE2-LABEL: movmsk_v2f64:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: cmplepd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: movd %xmm1, %ecx
|
|
|
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
|
|
|
; SSE2-NEXT: movd %xmm0, %eax
|
|
|
|
; SSE2-NEXT: andl %ecx, %eax
|
|
|
|
; SSE2-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: movmsk_v2f64:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vextractps $2, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: vmovd %xmm0, %eax
|
|
|
|
; AVX-NEXT: andl %ecx, %eax
|
|
|
|
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL-LABEL: movmsk_v2f64:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %ecx
|
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
|
|
; KNL-NEXT: andb %cl, %al
|
|
|
|
; KNL-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: movmsk_v2f64:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
|
|
|
|
; SKX-NEXT: kshiftrb $1, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %ecx
|
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: andb %cl, %al
|
|
|
|
; SKX-NEXT: # kill: def $al killed $al killed $eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp = fcmp oge <2 x double> %x, %y
|
|
|
|
%e1 = extractelement <2 x i1> %cmp, i32 0
|
|
|
|
%e2 = extractelement <2 x i1> %cmp, i32 1
|
|
|
|
%u1 = and i1 %e1, %e2
|
|
|
|
ret i1 %u1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
|
|
|
|
; SSE2-LABEL: PR39665_c_ray:
|
|
|
|
; SSE2: # %bb.0:
|
|
|
|
; SSE2-NEXT: cmpltpd %xmm0, %xmm1
|
|
|
|
; SSE2-NEXT: movapd %xmm1, -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE2-NEXT: testb $1, -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE2-NEXT: movl $42, %eax
|
|
|
|
; SSE2-NEXT: movl $99, %ecx
|
|
|
|
; SSE2-NEXT: cmovel %ecx, %eax
|
|
|
|
; SSE2-NEXT: testb $1, -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE2-NEXT: cmovel %ecx, %eax
|
|
|
|
; SSE2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: PR39665_c_ray:
|
|
|
|
; AVX: # %bb.0:
|
|
|
|
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
|
|
|
|
; AVX-NEXT: vpextrb $0, %xmm0, %ecx
|
|
|
|
; AVX-NEXT: vpextrb $8, %xmm0, %eax
|
|
|
|
; AVX-NEXT: testb $1, %al
|
|
|
|
; AVX-NEXT: movl $42, %eax
|
|
|
|
; AVX-NEXT: movl $99, %edx
|
|
|
|
; AVX-NEXT: cmovel %edx, %eax
|
|
|
|
; AVX-NEXT: testb $1, %cl
|
|
|
|
; AVX-NEXT: cmovel %edx, %eax
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
;
|
|
|
|
; KNL-LABEL: PR39665_c_ray:
|
|
|
|
; KNL: # %bb.0:
|
|
|
|
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
|
|
|
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
|
|
; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %k1, %eax
|
|
|
|
; KNL-NEXT: kmovw %k0, %ecx
|
|
|
|
; KNL-NEXT: testb $1, %al
|
|
|
|
; KNL-NEXT: movl $42, %eax
|
|
|
|
; KNL-NEXT: movl $99, %edx
|
|
|
|
; KNL-NEXT: cmovel %edx, %eax
|
|
|
|
; KNL-NEXT: testb $1, %cl
|
|
|
|
; KNL-NEXT: cmovel %edx, %eax
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: PR39665_c_ray:
|
|
|
|
; SKX: # %bb.0:
|
|
|
|
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
|
|
|
|
; SKX-NEXT: kshiftrb $1, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %k1, %eax
|
|
|
|
; SKX-NEXT: kmovd %k0, %ecx
|
|
|
|
; SKX-NEXT: testb $1, %al
|
|
|
|
; SKX-NEXT: movl $42, %eax
|
|
|
|
; SKX-NEXT: movl $99, %edx
|
|
|
|
; SKX-NEXT: cmovel %edx, %eax
|
|
|
|
; SKX-NEXT: testb $1, %cl
|
|
|
|
; SKX-NEXT: cmovel %edx, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp = fcmp ogt <2 x double> %x, %y
|
|
|
|
%e1 = extractelement <2 x i1> %cmp, i32 0
|
|
|
|
%e2 = extractelement <2 x i1> %cmp, i32 1
|
|
|
|
%u = and i1 %e1, %e2
|
|
|
|
%r = select i1 %u, i32 42, i32 99
|
|
|
|
ret i32 %r
|
|
|
|
}
|