[X86] Enable AVX512BW for memcmp()

llvm-svn: 373845
This commit is contained in:
David Zarzycki 2019-10-06 10:25:52 +00:00
parent c209598268
commit 7653ff398d
3 changed files with 106 additions and 41 deletions

View File

@ -42354,10 +42354,12 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
(OpSize == 256 && Subtarget.hasAVX2()) ||
(OpSize == 512 && Subtarget.useAVX512Regs())) {
EVT VecVT = OpSize == 512 ? MVT::v16i32 :
auto BW = Subtarget.hasBWI();
EVT VecVT = OpSize == 512 ? (BW ? MVT::v64i8 : MVT::v16i32) :
OpSize == 256 ? MVT::v32i8 :
MVT::v16i8;
EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
EVT CmpVT = OpSize == 512 ? (BW ? MVT::v64i1 : MVT::v16i1) : VecVT;
SDValue Cmp;
if (IsOrXorXorCCZero) {
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
@ -42377,6 +42379,9 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
}
// For 512-bits we want to emit a setcc that will lower to kortest.
if (OpSize == 512 && BW)
return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i64, Cmp),
DAG.getConstant(0xFFFFFFFFFFFFFFFF, DL, MVT::i64), CC);
if (OpSize == 512)
return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
DAG.getConstant(0xFFFF, DL, MVT::i16), CC);

View File

@ -6,7 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F --check-prefix=X64-AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512BW
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
@ -1551,6 +1551,15 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind {
; X64-AVX512F-NEXT: setae %al
; X64-AVX512F-NEXT: vzeroupper
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: length64_eq:
; X64-AVX512BW: # %bb.0:
; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0
; X64-AVX512BW-NEXT: kortestq %k0, %k0
; X64-AVX512BW-NEXT: setae %al
; X64-AVX512BW-NEXT: vzeroupper
; X64-AVX512BW-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
%cmp = icmp ne i32 %call, 0
ret i1 %cmp
@ -1612,6 +1621,15 @@ define i1 @length64_eq_const(i8* %X) nounwind {
; X64-AVX512F-NEXT: setb %al
; X64-AVX512F-NEXT: vzeroupper
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: length64_eq_const:
; X64-AVX512BW: # %bb.0:
; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0
; X64-AVX512BW-NEXT: kortestq %k0, %k0
; X64-AVX512BW-NEXT: setb %al
; X64-AVX512BW-NEXT: vzeroupper
; X64-AVX512BW-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c

View File

@ -319,14 +319,23 @@ define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: ne_i512:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512-NEXT: xorl %eax, %eax
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: setae %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512F-LABEL: ne_i512:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512F-NEXT: xorl %eax, %eax
; AVX512F-NEXT: kortestw %k0, %k0
; AVX512F-NEXT: setae %al
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: ne_i512:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: xorl %eax, %eax
; AVX512BW-NEXT: kortestq %k0, %k0
; AVX512BW-NEXT: setae %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%bcx = bitcast <8 x i64> %x to i512
%bcy = bitcast <8 x i64> %y to i512
%cmp = icmp ne i512 %bcx, %bcy
@ -464,14 +473,23 @@ define i32 @eq_i512(<8 x i64> %x, <8 x i64> %y) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: eq_i512:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512-NEXT: xorl %eax, %eax
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: setb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512F-LABEL: eq_i512:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512F-NEXT: xorl %eax, %eax
; AVX512F-NEXT: kortestw %k0, %k0
; AVX512F-NEXT: setb %al
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: eq_i512:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: xorl %eax, %eax
; AVX512BW-NEXT: kortestq %k0, %k0
; AVX512BW-NEXT: setb %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%bcx = bitcast <8 x i64> %x to i512
%bcy = bitcast <8 x i64> %y to i512
%cmp = icmp eq i512 %bcx, %bcy
@ -804,17 +822,29 @@ define i32 @ne_i512_pair(i512* %a, i512* %b) {
; NO512-NEXT: setne %al
; NO512-NEXT: retq
;
; AVX512-LABEL: ne_i512_pair:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
; AVX512-NEXT: xorl %eax, %eax
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: setae %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512F-LABEL: ne_i512_pair:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
; AVX512F-NEXT: xorl %eax, %eax
; AVX512F-NEXT: kortestw %k0, %k0
; AVX512F-NEXT: setae %al
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: ne_i512_pair:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
; AVX512BW-NEXT: xorl %eax, %eax
; AVX512BW-NEXT: kortestq %k0, %k0
; AVX512BW-NEXT: setae %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%a0 = load i512, i512* %a
%b0 = load i512, i512* %b
%xor1 = xor i512 %a0, %b0
@ -886,17 +916,29 @@ define i32 @eq_i512_pair(i512* %a, i512* %b) {
; NO512-NEXT: sete %al
; NO512-NEXT: retq
;
; AVX512-LABEL: eq_i512_pair:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
; AVX512-NEXT: xorl %eax, %eax
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: setb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512F-LABEL: eq_i512_pair:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
; AVX512F-NEXT: xorl %eax, %eax
; AVX512F-NEXT: kortestw %k0, %k0
; AVX512F-NEXT: setb %al
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: eq_i512_pair:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
; AVX512BW-NEXT: xorl %eax, %eax
; AVX512BW-NEXT: kortestq %k0, %k0
; AVX512BW-NEXT: setb %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%a0 = load i512, i512* %a
%b0 = load i512, i512* %b
%xor1 = xor i512 %a0, %b0