forked from OSchip/llvm-project
[X86] Add AVX512 support to combineVectorSizedSetCCEquality.
Reviewers: spatel, RKSimon Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D52424 llvm-svn: 342989
This commit is contained in:
parent
69ed4710b8
commit
6fb1358a98
|
@ -38653,12 +38653,15 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
|
||||
// TODO: Use PXOR + PTEST for SSE4.1 or later?
|
||||
// TODO: Add support for AVX-512.
|
||||
EVT VT = SetCC->getValueType(0);
|
||||
SDLoc DL(SetCC);
|
||||
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
|
||||
(OpSize == 256 && Subtarget.hasAVX2())) {
|
||||
EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
|
||||
(OpSize == 256 && Subtarget.hasAVX2()) ||
|
||||
(OpSize == 512 && Subtarget.useAVX512Regs())) {
|
||||
EVT VecVT = OpSize == 512 ? MVT::v16i32 :
|
||||
OpSize == 256 ? MVT::v32i8 :
|
||||
MVT::v16i8;
|
||||
EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
|
||||
SDValue Cmp;
|
||||
if (IsOrXorXorCCZero) {
|
||||
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
|
||||
|
@ -38669,14 +38672,18 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
|
|||
SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
|
||||
SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
|
||||
SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
|
||||
SDValue Cmp1 = DAG.getSetCC(DL, VecVT, A, B, ISD::SETEQ);
|
||||
SDValue Cmp2 = DAG.getSetCC(DL, VecVT, C, D, ISD::SETEQ);
|
||||
Cmp = DAG.getNode(ISD::AND, DL, VecVT, Cmp1, Cmp2);
|
||||
SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
|
||||
SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ);
|
||||
Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2);
|
||||
} else {
|
||||
SDValue VecX = DAG.getBitcast(VecVT, X);
|
||||
SDValue VecY = DAG.getBitcast(VecVT, Y);
|
||||
Cmp = DAG.getSetCC(DL, VecVT, VecX, VecY, ISD::SETEQ);
|
||||
Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
|
||||
}
|
||||
// For 512-bits we want to emit a setcc that will lower to kortest.
|
||||
if (OpSize == 512)
|
||||
return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
|
||||
DAG.getConstant(0xFFFF, DL, MVT::i16), CC);
|
||||
// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
|
||||
// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
|
||||
// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=AVXANY --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=AVXANY --check-prefix=AVX256 --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=ANY --check-prefix=AVXANY --check-prefix=AVX256 --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ANY --check-prefix=AVXANY --check-prefix=AVX256 --check-prefix=AVX512BW
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=AVXANY --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=AVXANY --check-prefix=AVX256 --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=ANY --check-prefix=AVXANY --check-prefix=AVX256 --check-prefix=AVX512 --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ANY --check-prefix=AVXANY --check-prefix=AVX256 --check-prefix=AVX512 --check-prefix=AVX512BW
|
||||
|
||||
; Equality checks of 128/256-bit values can use PMOVMSK or PTEST to avoid scalarization.
|
||||
|
||||
|
@ -319,93 +319,14 @@ define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: ne_i512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512F-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm3
|
||||
; AVX512F-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rdi
|
||||
; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm4
|
||||
; AVX512F-NEXT: vmovq %xmm4, %rax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm2, %r11
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm3, %r10
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %r9
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm4, %r8
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512F-NEXT: xorq %rdx, %rcx
|
||||
; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm2
|
||||
; AVX512F-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512F-NEXT: xorq %rsi, %rdx
|
||||
; AVX512F-NEXT: orq %rcx, %rdx
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rcx
|
||||
; AVX512F-NEXT: xorq %rdi, %rcx
|
||||
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
|
||||
; AVX512F-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512F-NEXT: xorq %rax, %rsi
|
||||
; AVX512F-NEXT: orq %rdx, %rsi
|
||||
; AVX512F-NEXT: orq %rcx, %rsi
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512F-NEXT: xorq %r11, %rax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; AVX512F-NEXT: xorq %r10, %rcx
|
||||
; AVX512F-NEXT: orq %rax, %rcx
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512F-NEXT: xorq %r9, %rax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm3, %rdx
|
||||
; AVX512F-NEXT: xorq %r8, %rdx
|
||||
; AVX512F-NEXT: orq %rcx, %rdx
|
||||
; AVX512F-NEXT: orq %rax, %rdx
|
||||
; AVX512F-NEXT: xorl %eax, %eax
|
||||
; AVX512F-NEXT: orq %rsi, %rdx
|
||||
; AVX512F-NEXT: setne %al
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: ne_i512:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512BW-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3
|
||||
; AVX512BW-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512BW-NEXT: vmovq %xmm0, %rdi
|
||||
; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4
|
||||
; AVX512BW-NEXT: vmovq %xmm4, %rax
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm2, %r11
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm3, %r10
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm0, %r9
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm4, %r8
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512BW-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512BW-NEXT: xorq %rdx, %rcx
|
||||
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
|
||||
; AVX512BW-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512BW-NEXT: xorq %rsi, %rdx
|
||||
; AVX512BW-NEXT: orq %rcx, %rdx
|
||||
; AVX512BW-NEXT: vmovq %xmm1, %rcx
|
||||
; AVX512BW-NEXT: xorq %rdi, %rcx
|
||||
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
|
||||
; AVX512BW-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512BW-NEXT: xorq %rax, %rsi
|
||||
; AVX512BW-NEXT: orq %rdx, %rsi
|
||||
; AVX512BW-NEXT: orq %rcx, %rsi
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512BW-NEXT: xorq %r11, %rax
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; AVX512BW-NEXT: xorq %r10, %rcx
|
||||
; AVX512BW-NEXT: orq %rax, %rcx
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512BW-NEXT: xorq %r9, %rax
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rdx
|
||||
; AVX512BW-NEXT: xorq %r8, %rdx
|
||||
; AVX512BW-NEXT: orq %rcx, %rdx
|
||||
; AVX512BW-NEXT: orq %rax, %rdx
|
||||
; AVX512BW-NEXT: xorl %eax, %eax
|
||||
; AVX512BW-NEXT: orq %rsi, %rdx
|
||||
; AVX512BW-NEXT: setne %al
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512-LABEL: ne_i512:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: kortestw %k0, %k0
|
||||
; AVX512-NEXT: setae %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%bcx = bitcast <8 x i64> %x to i512
|
||||
%bcy = bitcast <8 x i64> %y to i512
|
||||
%cmp = icmp ne i512 %bcx, %bcy
|
||||
|
@ -543,93 +464,14 @@ define i32 @eq_i512(<8 x i64> %x, <8 x i64> %y) {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: eq_i512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512F-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm3
|
||||
; AVX512F-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rdi
|
||||
; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm4
|
||||
; AVX512F-NEXT: vmovq %xmm4, %rax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm2, %r11
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm3, %r10
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %r9
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm4, %r8
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512F-NEXT: xorq %rdx, %rcx
|
||||
; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm2
|
||||
; AVX512F-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512F-NEXT: xorq %rsi, %rdx
|
||||
; AVX512F-NEXT: orq %rcx, %rdx
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rcx
|
||||
; AVX512F-NEXT: xorq %rdi, %rcx
|
||||
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
|
||||
; AVX512F-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512F-NEXT: xorq %rax, %rsi
|
||||
; AVX512F-NEXT: orq %rdx, %rsi
|
||||
; AVX512F-NEXT: orq %rcx, %rsi
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512F-NEXT: xorq %r11, %rax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; AVX512F-NEXT: xorq %r10, %rcx
|
||||
; AVX512F-NEXT: orq %rax, %rcx
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512F-NEXT: xorq %r9, %rax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm3, %rdx
|
||||
; AVX512F-NEXT: xorq %r8, %rdx
|
||||
; AVX512F-NEXT: orq %rcx, %rdx
|
||||
; AVX512F-NEXT: orq %rax, %rdx
|
||||
; AVX512F-NEXT: xorl %eax, %eax
|
||||
; AVX512F-NEXT: orq %rsi, %rdx
|
||||
; AVX512F-NEXT: sete %al
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: eq_i512:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512BW-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3
|
||||
; AVX512BW-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512BW-NEXT: vmovq %xmm0, %rdi
|
||||
; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4
|
||||
; AVX512BW-NEXT: vmovq %xmm4, %rax
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm2, %r11
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm3, %r10
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm0, %r9
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm4, %r8
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512BW-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512BW-NEXT: xorq %rdx, %rcx
|
||||
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
|
||||
; AVX512BW-NEXT: vmovq %xmm2, %rdx
|
||||
; AVX512BW-NEXT: xorq %rsi, %rdx
|
||||
; AVX512BW-NEXT: orq %rcx, %rdx
|
||||
; AVX512BW-NEXT: vmovq %xmm1, %rcx
|
||||
; AVX512BW-NEXT: xorq %rdi, %rcx
|
||||
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
|
||||
; AVX512BW-NEXT: vmovq %xmm3, %rsi
|
||||
; AVX512BW-NEXT: xorq %rax, %rsi
|
||||
; AVX512BW-NEXT: orq %rdx, %rsi
|
||||
; AVX512BW-NEXT: orq %rcx, %rsi
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512BW-NEXT: xorq %r11, %rax
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; AVX512BW-NEXT: xorq %r10, %rcx
|
||||
; AVX512BW-NEXT: orq %rax, %rcx
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512BW-NEXT: xorq %r9, %rax
|
||||
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rdx
|
||||
; AVX512BW-NEXT: xorq %r8, %rdx
|
||||
; AVX512BW-NEXT: orq %rcx, %rdx
|
||||
; AVX512BW-NEXT: orq %rax, %rdx
|
||||
; AVX512BW-NEXT: xorl %eax, %eax
|
||||
; AVX512BW-NEXT: orq %rsi, %rdx
|
||||
; AVX512BW-NEXT: sete %al
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512-LABEL: eq_i512:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: kortestw %k0, %k0
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%bcx = bitcast <8 x i64> %x to i512
|
||||
%bcy = bitcast <8 x i64> %y to i512
|
||||
%cmp = icmp eq i512 %bcx, %bcy
|
||||
|
@ -909,58 +751,70 @@ define i32 @eq_i256_pair(i256* %a, i256* %b) {
|
|||
; if we allowed 2 pairs of 64-byte loads per block.
|
||||
|
||||
define i32 @ne_i512_pair(i512* %a, i512* %b) {
|
||||
; ANY-LABEL: ne_i512_pair:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: movq 32(%rdi), %r8
|
||||
; ANY-NEXT: movq 48(%rdi), %r9
|
||||
; ANY-NEXT: movq 40(%rdi), %rdx
|
||||
; ANY-NEXT: movq 56(%rdi), %rcx
|
||||
; ANY-NEXT: xorq 56(%rsi), %rcx
|
||||
; ANY-NEXT: movq 120(%rdi), %rax
|
||||
; ANY-NEXT: xorq 120(%rsi), %rax
|
||||
; ANY-NEXT: orq %rcx, %rax
|
||||
; ANY-NEXT: movq 88(%rdi), %rcx
|
||||
; ANY-NEXT: xorq 88(%rsi), %rcx
|
||||
; ANY-NEXT: orq %rcx, %rax
|
||||
; ANY-NEXT: movq 24(%rdi), %rcx
|
||||
; ANY-NEXT: xorq 24(%rsi), %rcx
|
||||
; ANY-NEXT: xorq 40(%rsi), %rdx
|
||||
; ANY-NEXT: orq %rcx, %rax
|
||||
; ANY-NEXT: movq 104(%rdi), %rcx
|
||||
; ANY-NEXT: xorq 104(%rsi), %rcx
|
||||
; ANY-NEXT: orq %rdx, %rcx
|
||||
; ANY-NEXT: movq 72(%rdi), %rdx
|
||||
; ANY-NEXT: xorq 72(%rsi), %rdx
|
||||
; ANY-NEXT: orq %rdx, %rcx
|
||||
; ANY-NEXT: movq 16(%rdi), %r10
|
||||
; ANY-NEXT: orq %rax, %rcx
|
||||
; ANY-NEXT: movq 8(%rdi), %rax
|
||||
; ANY-NEXT: xorq 8(%rsi), %rax
|
||||
; ANY-NEXT: xorq 48(%rsi), %r9
|
||||
; ANY-NEXT: orq %rax, %rcx
|
||||
; ANY-NEXT: movq 112(%rdi), %rax
|
||||
; ANY-NEXT: xorq 112(%rsi), %rax
|
||||
; ANY-NEXT: orq %r9, %rax
|
||||
; ANY-NEXT: movq 80(%rdi), %rdx
|
||||
; ANY-NEXT: xorq 80(%rsi), %rdx
|
||||
; ANY-NEXT: orq %rdx, %rax
|
||||
; ANY-NEXT: movq (%rdi), %r9
|
||||
; ANY-NEXT: xorq 16(%rsi), %r10
|
||||
; ANY-NEXT: xorq (%rsi), %r9
|
||||
; ANY-NEXT: xorq 32(%rsi), %r8
|
||||
; ANY-NEXT: orq %r10, %rax
|
||||
; ANY-NEXT: movq 96(%rdi), %rdx
|
||||
; ANY-NEXT: movq 64(%rdi), %rdi
|
||||
; ANY-NEXT: xorq 64(%rsi), %rdi
|
||||
; ANY-NEXT: xorq 96(%rsi), %rdx
|
||||
; ANY-NEXT: orq %r8, %rdx
|
||||
; ANY-NEXT: orq %rdi, %rdx
|
||||
; ANY-NEXT: orq %rax, %rdx
|
||||
; ANY-NEXT: orq %r9, %rdx
|
||||
; ANY-NEXT: xorl %eax, %eax
|
||||
; ANY-NEXT: orq %rcx, %rdx
|
||||
; ANY-NEXT: setne %al
|
||||
; ANY-NEXT: retq
|
||||
; NO512-LABEL: ne_i512_pair:
|
||||
; NO512: # %bb.0:
|
||||
; NO512-NEXT: movq 32(%rdi), %r8
|
||||
; NO512-NEXT: movq 48(%rdi), %r9
|
||||
; NO512-NEXT: movq 40(%rdi), %rdx
|
||||
; NO512-NEXT: movq 56(%rdi), %rcx
|
||||
; NO512-NEXT: xorq 56(%rsi), %rcx
|
||||
; NO512-NEXT: movq 120(%rdi), %rax
|
||||
; NO512-NEXT: xorq 120(%rsi), %rax
|
||||
; NO512-NEXT: orq %rcx, %rax
|
||||
; NO512-NEXT: movq 88(%rdi), %rcx
|
||||
; NO512-NEXT: xorq 88(%rsi), %rcx
|
||||
; NO512-NEXT: orq %rcx, %rax
|
||||
; NO512-NEXT: movq 24(%rdi), %rcx
|
||||
; NO512-NEXT: xorq 24(%rsi), %rcx
|
||||
; NO512-NEXT: xorq 40(%rsi), %rdx
|
||||
; NO512-NEXT: orq %rcx, %rax
|
||||
; NO512-NEXT: movq 104(%rdi), %rcx
|
||||
; NO512-NEXT: xorq 104(%rsi), %rcx
|
||||
; NO512-NEXT: orq %rdx, %rcx
|
||||
; NO512-NEXT: movq 72(%rdi), %rdx
|
||||
; NO512-NEXT: xorq 72(%rsi), %rdx
|
||||
; NO512-NEXT: orq %rdx, %rcx
|
||||
; NO512-NEXT: movq 16(%rdi), %r10
|
||||
; NO512-NEXT: orq %rax, %rcx
|
||||
; NO512-NEXT: movq 8(%rdi), %rax
|
||||
; NO512-NEXT: xorq 8(%rsi), %rax
|
||||
; NO512-NEXT: xorq 48(%rsi), %r9
|
||||
; NO512-NEXT: orq %rax, %rcx
|
||||
; NO512-NEXT: movq 112(%rdi), %rax
|
||||
; NO512-NEXT: xorq 112(%rsi), %rax
|
||||
; NO512-NEXT: orq %r9, %rax
|
||||
; NO512-NEXT: movq 80(%rdi), %rdx
|
||||
; NO512-NEXT: xorq 80(%rsi), %rdx
|
||||
; NO512-NEXT: orq %rdx, %rax
|
||||
; NO512-NEXT: movq (%rdi), %r9
|
||||
; NO512-NEXT: xorq 16(%rsi), %r10
|
||||
; NO512-NEXT: xorq (%rsi), %r9
|
||||
; NO512-NEXT: xorq 32(%rsi), %r8
|
||||
; NO512-NEXT: orq %r10, %rax
|
||||
; NO512-NEXT: movq 96(%rdi), %rdx
|
||||
; NO512-NEXT: movq 64(%rdi), %rdi
|
||||
; NO512-NEXT: xorq 64(%rsi), %rdi
|
||||
; NO512-NEXT: xorq 96(%rsi), %rdx
|
||||
; NO512-NEXT: orq %r8, %rdx
|
||||
; NO512-NEXT: orq %rdi, %rdx
|
||||
; NO512-NEXT: orq %rax, %rdx
|
||||
; NO512-NEXT: orq %r9, %rdx
|
||||
; NO512-NEXT: xorl %eax, %eax
|
||||
; NO512-NEXT: orq %rcx, %rdx
|
||||
; NO512-NEXT: setne %al
|
||||
; NO512-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: ne_i512_pair:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
|
||||
; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
|
||||
; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: kortestw %k0, %k0
|
||||
; AVX512-NEXT: setae %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%a0 = load i512, i512* %a
|
||||
%b0 = load i512, i512* %b
|
||||
%xor1 = xor i512 %a0, %b0
|
||||
|
@ -979,58 +833,70 @@ define i32 @ne_i512_pair(i512* %a, i512* %b) {
|
|||
; if we allowed 2 pairs of 64-byte loads per block.
|
||||
|
||||
define i32 @eq_i512_pair(i512* %a, i512* %b) {
|
||||
; ANY-LABEL: eq_i512_pair:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: movq 32(%rdi), %r8
|
||||
; ANY-NEXT: movq 48(%rdi), %r9
|
||||
; ANY-NEXT: movq 40(%rdi), %rdx
|
||||
; ANY-NEXT: movq 56(%rdi), %rcx
|
||||
; ANY-NEXT: xorq 56(%rsi), %rcx
|
||||
; ANY-NEXT: movq 120(%rdi), %rax
|
||||
; ANY-NEXT: xorq 120(%rsi), %rax
|
||||
; ANY-NEXT: orq %rcx, %rax
|
||||
; ANY-NEXT: movq 88(%rdi), %rcx
|
||||
; ANY-NEXT: xorq 88(%rsi), %rcx
|
||||
; ANY-NEXT: orq %rcx, %rax
|
||||
; ANY-NEXT: movq 24(%rdi), %rcx
|
||||
; ANY-NEXT: xorq 24(%rsi), %rcx
|
||||
; ANY-NEXT: xorq 40(%rsi), %rdx
|
||||
; ANY-NEXT: orq %rcx, %rax
|
||||
; ANY-NEXT: movq 104(%rdi), %rcx
|
||||
; ANY-NEXT: xorq 104(%rsi), %rcx
|
||||
; ANY-NEXT: orq %rdx, %rcx
|
||||
; ANY-NEXT: movq 72(%rdi), %rdx
|
||||
; ANY-NEXT: xorq 72(%rsi), %rdx
|
||||
; ANY-NEXT: orq %rdx, %rcx
|
||||
; ANY-NEXT: movq 16(%rdi), %r10
|
||||
; ANY-NEXT: orq %rax, %rcx
|
||||
; ANY-NEXT: movq 8(%rdi), %rax
|
||||
; ANY-NEXT: xorq 8(%rsi), %rax
|
||||
; ANY-NEXT: xorq 48(%rsi), %r9
|
||||
; ANY-NEXT: orq %rax, %rcx
|
||||
; ANY-NEXT: movq 112(%rdi), %rax
|
||||
; ANY-NEXT: xorq 112(%rsi), %rax
|
||||
; ANY-NEXT: orq %r9, %rax
|
||||
; ANY-NEXT: movq 80(%rdi), %rdx
|
||||
; ANY-NEXT: xorq 80(%rsi), %rdx
|
||||
; ANY-NEXT: orq %rdx, %rax
|
||||
; ANY-NEXT: movq (%rdi), %r9
|
||||
; ANY-NEXT: xorq 16(%rsi), %r10
|
||||
; ANY-NEXT: xorq (%rsi), %r9
|
||||
; ANY-NEXT: xorq 32(%rsi), %r8
|
||||
; ANY-NEXT: orq %r10, %rax
|
||||
; ANY-NEXT: movq 96(%rdi), %rdx
|
||||
; ANY-NEXT: movq 64(%rdi), %rdi
|
||||
; ANY-NEXT: xorq 64(%rsi), %rdi
|
||||
; ANY-NEXT: xorq 96(%rsi), %rdx
|
||||
; ANY-NEXT: orq %r8, %rdx
|
||||
; ANY-NEXT: orq %rdi, %rdx
|
||||
; ANY-NEXT: orq %rax, %rdx
|
||||
; ANY-NEXT: orq %r9, %rdx
|
||||
; ANY-NEXT: xorl %eax, %eax
|
||||
; ANY-NEXT: orq %rcx, %rdx
|
||||
; ANY-NEXT: sete %al
|
||||
; ANY-NEXT: retq
|
||||
; NO512-LABEL: eq_i512_pair:
|
||||
; NO512: # %bb.0:
|
||||
; NO512-NEXT: movq 32(%rdi), %r8
|
||||
; NO512-NEXT: movq 48(%rdi), %r9
|
||||
; NO512-NEXT: movq 40(%rdi), %rdx
|
||||
; NO512-NEXT: movq 56(%rdi), %rcx
|
||||
; NO512-NEXT: xorq 56(%rsi), %rcx
|
||||
; NO512-NEXT: movq 120(%rdi), %rax
|
||||
; NO512-NEXT: xorq 120(%rsi), %rax
|
||||
; NO512-NEXT: orq %rcx, %rax
|
||||
; NO512-NEXT: movq 88(%rdi), %rcx
|
||||
; NO512-NEXT: xorq 88(%rsi), %rcx
|
||||
; NO512-NEXT: orq %rcx, %rax
|
||||
; NO512-NEXT: movq 24(%rdi), %rcx
|
||||
; NO512-NEXT: xorq 24(%rsi), %rcx
|
||||
; NO512-NEXT: xorq 40(%rsi), %rdx
|
||||
; NO512-NEXT: orq %rcx, %rax
|
||||
; NO512-NEXT: movq 104(%rdi), %rcx
|
||||
; NO512-NEXT: xorq 104(%rsi), %rcx
|
||||
; NO512-NEXT: orq %rdx, %rcx
|
||||
; NO512-NEXT: movq 72(%rdi), %rdx
|
||||
; NO512-NEXT: xorq 72(%rsi), %rdx
|
||||
; NO512-NEXT: orq %rdx, %rcx
|
||||
; NO512-NEXT: movq 16(%rdi), %r10
|
||||
; NO512-NEXT: orq %rax, %rcx
|
||||
; NO512-NEXT: movq 8(%rdi), %rax
|
||||
; NO512-NEXT: xorq 8(%rsi), %rax
|
||||
; NO512-NEXT: xorq 48(%rsi), %r9
|
||||
; NO512-NEXT: orq %rax, %rcx
|
||||
; NO512-NEXT: movq 112(%rdi), %rax
|
||||
; NO512-NEXT: xorq 112(%rsi), %rax
|
||||
; NO512-NEXT: orq %r9, %rax
|
||||
; NO512-NEXT: movq 80(%rdi), %rdx
|
||||
; NO512-NEXT: xorq 80(%rsi), %rdx
|
||||
; NO512-NEXT: orq %rdx, %rax
|
||||
; NO512-NEXT: movq (%rdi), %r9
|
||||
; NO512-NEXT: xorq 16(%rsi), %r10
|
||||
; NO512-NEXT: xorq (%rsi), %r9
|
||||
; NO512-NEXT: xorq 32(%rsi), %r8
|
||||
; NO512-NEXT: orq %r10, %rax
|
||||
; NO512-NEXT: movq 96(%rdi), %rdx
|
||||
; NO512-NEXT: movq 64(%rdi), %rdi
|
||||
; NO512-NEXT: xorq 64(%rsi), %rdi
|
||||
; NO512-NEXT: xorq 96(%rsi), %rdx
|
||||
; NO512-NEXT: orq %r8, %rdx
|
||||
; NO512-NEXT: orq %rdi, %rdx
|
||||
; NO512-NEXT: orq %rax, %rdx
|
||||
; NO512-NEXT: orq %r9, %rdx
|
||||
; NO512-NEXT: xorl %eax, %eax
|
||||
; NO512-NEXT: orq %rcx, %rdx
|
||||
; NO512-NEXT: sete %al
|
||||
; NO512-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: eq_i512_pair:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
|
||||
; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
|
||||
; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: kortestw %k0, %k0
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%a0 = load i512, i512* %a
|
||||
%b0 = load i512, i512* %b
|
||||
%xor1 = xor i512 %a0, %b0
|
||||
|
|
Loading…
Reference in New Issue