forked from OSchip/llvm-project
parent
c209598268
commit
7653ff398d
|
@ -42354,10 +42354,12 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
|
|||
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
|
||||
(OpSize == 256 && Subtarget.hasAVX2()) ||
|
||||
(OpSize == 512 && Subtarget.useAVX512Regs())) {
|
||||
EVT VecVT = OpSize == 512 ? MVT::v16i32 :
|
||||
auto BW = Subtarget.hasBWI();
|
||||
EVT VecVT = OpSize == 512 ? (BW ? MVT::v64i8 : MVT::v16i32) :
|
||||
OpSize == 256 ? MVT::v32i8 :
|
||||
MVT::v16i8;
|
||||
EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
|
||||
EVT CmpVT = OpSize == 512 ? (BW ? MVT::v64i1 : MVT::v16i1) : VecVT;
|
||||
|
||||
SDValue Cmp;
|
||||
if (IsOrXorXorCCZero) {
|
||||
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
|
||||
|
@ -42377,6 +42379,9 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
|
|||
Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
|
||||
}
|
||||
// For 512-bits we want to emit a setcc that will lower to kortest.
|
||||
if (OpSize == 512 && BW)
|
||||
return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i64, Cmp),
|
||||
DAG.getConstant(0xFFFFFFFFFFFFFFFF, DL, MVT::i64), CC);
|
||||
if (OpSize == 512)
|
||||
return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
|
||||
DAG.getConstant(0xFFFF, DL, MVT::i16), CC);
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F --check-prefix=X64-AVX512BW
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512BW
|
||||
|
||||
; This tests codegen time inlining/optimization of memcmp
|
||||
; rdar://6480398
|
||||
|
@ -1551,6 +1551,15 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind {
|
|||
; X64-AVX512F-NEXT: setae %al
|
||||
; X64-AVX512F-NEXT: vzeroupper
|
||||
; X64-AVX512F-NEXT: retq
|
||||
;
|
||||
; X64-AVX512BW-LABEL: length64_eq:
|
||||
; X64-AVX512BW: # %bb.0:
|
||||
; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0
|
||||
; X64-AVX512BW-NEXT: kortestq %k0, %k0
|
||||
; X64-AVX512BW-NEXT: setae %al
|
||||
; X64-AVX512BW-NEXT: vzeroupper
|
||||
; X64-AVX512BW-NEXT: retq
|
||||
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
|
||||
%cmp = icmp ne i32 %call, 0
|
||||
ret i1 %cmp
|
||||
|
@ -1612,6 +1621,15 @@ define i1 @length64_eq_const(i8* %X) nounwind {
|
|||
; X64-AVX512F-NEXT: setb %al
|
||||
; X64-AVX512F-NEXT: vzeroupper
|
||||
; X64-AVX512F-NEXT: retq
|
||||
;
|
||||
; X64-AVX512BW-LABEL: length64_eq_const:
|
||||
; X64-AVX512BW: # %bb.0:
|
||||
; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0
|
||||
; X64-AVX512BW-NEXT: kortestq %k0, %k0
|
||||
; X64-AVX512BW-NEXT: setb %al
|
||||
; X64-AVX512BW-NEXT: vzeroupper
|
||||
; X64-AVX512BW-NEXT: retq
|
||||
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
|
||||
%c = icmp eq i32 %m, 0
|
||||
ret i1 %c
|
||||
|
|
|
@ -319,14 +319,23 @@ define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: ne_i512:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: kortestw %k0, %k0
|
||||
; AVX512-NEXT: setae %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: ne_i512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512F-NEXT: xorl %eax, %eax
|
||||
; AVX512F-NEXT: kortestw %k0, %k0
|
||||
; AVX512F-NEXT: setae %al
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: ne_i512:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: xorl %eax, %eax
|
||||
; AVX512BW-NEXT: kortestq %k0, %k0
|
||||
; AVX512BW-NEXT: setae %al
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%bcx = bitcast <8 x i64> %x to i512
|
||||
%bcy = bitcast <8 x i64> %y to i512
|
||||
%cmp = icmp ne i512 %bcx, %bcy
|
||||
|
@ -464,14 +473,23 @@ define i32 @eq_i512(<8 x i64> %x, <8 x i64> %y) {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: eq_i512:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: kortestw %k0, %k0
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: eq_i512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512F-NEXT: xorl %eax, %eax
|
||||
; AVX512F-NEXT: kortestw %k0, %k0
|
||||
; AVX512F-NEXT: setb %al
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: eq_i512:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: xorl %eax, %eax
|
||||
; AVX512BW-NEXT: kortestq %k0, %k0
|
||||
; AVX512BW-NEXT: setb %al
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%bcx = bitcast <8 x i64> %x to i512
|
||||
%bcy = bitcast <8 x i64> %y to i512
|
||||
%cmp = icmp eq i512 %bcx, %bcy
|
||||
|
@ -804,17 +822,29 @@ define i32 @ne_i512_pair(i512* %a, i512* %b) {
|
|||
; NO512-NEXT: setne %al
|
||||
; NO512-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: ne_i512_pair:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
|
||||
; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
|
||||
; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: kortestw %k0, %k0
|
||||
; AVX512-NEXT: setae %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: ne_i512_pair:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
|
||||
; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
|
||||
; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
|
||||
; AVX512F-NEXT: xorl %eax, %eax
|
||||
; AVX512F-NEXT: kortestw %k0, %k0
|
||||
; AVX512F-NEXT: setae %al
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: ne_i512_pair:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
|
||||
; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
|
||||
; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
|
||||
; AVX512BW-NEXT: xorl %eax, %eax
|
||||
; AVX512BW-NEXT: kortestq %k0, %k0
|
||||
; AVX512BW-NEXT: setae %al
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%a0 = load i512, i512* %a
|
||||
%b0 = load i512, i512* %b
|
||||
%xor1 = xor i512 %a0, %b0
|
||||
|
@ -886,17 +916,29 @@ define i32 @eq_i512_pair(i512* %a, i512* %b) {
|
|||
; NO512-NEXT: sete %al
|
||||
; NO512-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: eq_i512_pair:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
|
||||
; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
|
||||
; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: kortestw %k0, %k0
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: eq_i512_pair:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
|
||||
; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
|
||||
; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
|
||||
; AVX512F-NEXT: xorl %eax, %eax
|
||||
; AVX512F-NEXT: kortestw %k0, %k0
|
||||
; AVX512F-NEXT: setb %al
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: eq_i512_pair:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
|
||||
; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
|
||||
; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
|
||||
; AVX512BW-NEXT: xorl %eax, %eax
|
||||
; AVX512BW-NEXT: kortestq %k0, %k0
|
||||
; AVX512BW-NEXT: setb %al
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%a0 = load i512, i512* %a
|
||||
%b0 = load i512, i512* %b
|
||||
%xor1 = xor i512 %a0, %b0
|
||||
|
|
Loading…
Reference in New Issue