From 245036950a7a63f77aa1f06f46dfe2fbb2cafc0f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 9 Apr 2021 15:51:53 +0100 Subject: [PATCH] [X86][BMI] Fold cmpeq/ne(or(X,Y),X) --> cmpeq/ne(and(~X,Y),0) (PR44136) I've initially just enabled this for BMI which has the ANDN instruction for i32/i64 - the i16/i8 cases give an idea of what'd we get when we enable it in all cases (I'll do this as a later commit). Additionally, the i16/i8 cases could be freely promoted to i32 (as the args are already zeroext) and we could then make use of ANDN + the free cmp0 there as well - this has come up in PR48768 and PR49028 so I'm going to look at this soon. https://alive2.llvm.org/ce/z/QVWHP_ https://alive2.llvm.org/ce/z/pLngT- Vector cases do not appear to benefit from this as we end up with having to generate the zero vector as well - this is one of the reasons I didn't try to tie this into hasAndNot/hasAndNotCompare. Differential Revision: https://reviews.llvm.org/D100177 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++++++ llvm/test/CodeGen/X86/setcc-logic.ll | 80 ++++++++++++++++--------- 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7c0ec182865c..12f97372859b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48162,6 +48162,26 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(X86ISD::SETCC, DL, MVT::i8, X86CC, V)); } + + // cmpeq(or(X,Y),X) --> cmpeq(and(~X,Y),0) + // cmpne(or(X,Y),X) --> cmpne(and(~X,Y),0) + if (OpVT.isScalarInteger() && Subtarget.hasBMI()) { + auto MatchOrCmpEq = [&](SDValue N0, SDValue N1) { + if (N0.getOpcode() == ISD::OR && N0->hasOneUse()) { + if (N0.getOperand(0) == N1) + return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT), + N0.getOperand(1)); + if (N0.getOperand(1) == N1) + return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT), + N0.getOperand(0)); + } + return SDValue(); + }; + if (SDValue AndN = MatchOrCmpEq(LHS, RHS)) + return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); + if (SDValue AndN = MatchOrCmpEq(RHS, LHS)) + return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); + } } if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll index 8fc0da5ae406..b32d625ab619 100644 --- a/llvm/test/CodeGen/X86/setcc-logic.ll +++ b/llvm/test/CodeGen/X86/setcc-logic.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,NOBMI +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI define zeroext i1 @all_bits_clear(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_bits_clear: @@ -607,51 +607,77 @@ define i1 @and_icmps_const_1bit_diff_common_op(i32 %x, i32 %y) { ret i1 %r } -; TODO: PR44136 - fold cmpeq(or(X,Y),X) --> cmpeq(and(~X,Y),0) +; PR44136 - fold cmpeq(or(X,Y),X) --> cmpeq(and(~X,Y),0) define i1 @or_cmp_eq_i64(i64 %x, i64 %y) { -; CHECK-LABEL: or_cmp_eq_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: orq %rdi, %rsi -; CHECK-NEXT: cmpq %rdi, %rsi -; CHECK-NEXT: sete %al -; CHECK-NEXT: retq +; NOBMI-LABEL: or_cmp_eq_i64: +; NOBMI: # %bb.0: +; NOBMI-NEXT: orq %rdi, %rsi +; NOBMI-NEXT: cmpq %rdi, %rsi +; NOBMI-NEXT: sete %al +; NOBMI-NEXT: retq +; +; BMI-LABEL: or_cmp_eq_i64: +; BMI: # %bb.0: +; BMI-NEXT: andnq %rsi, %rdi, %rax +; BMI-NEXT: sete %al +; BMI-NEXT: retq %o = or i64 %x, %y %c = icmp eq i64 %o, %x ret i1 %c } define i1 @or_cmp_ne_i32(i32 %x, i32 %y) { -; CHECK-LABEL: or_cmp_ne_i32: -; CHECK: # %bb.0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: setne %al -; CHECK-NEXT: retq +; NOBMI-LABEL: or_cmp_ne_i32: +; NOBMI: # %bb.0: +; NOBMI-NEXT: orl %esi, %edi +; NOBMI-NEXT: cmpl %esi, %edi +; NOBMI-NEXT: setne %al +; NOBMI-NEXT: retq +; +; BMI-LABEL: or_cmp_ne_i32: +; BMI: # %bb.0: +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: setne %al +; BMI-NEXT: retq %o = or i32 %x, %y %c = icmp ne i32 %o, %y ret i1 %c } define i1 @or_cmp_eq_i16(i16 zeroext %x, i16 zeroext %y) { -; CHECK-LABEL: or_cmp_eq_i16: -; CHECK: # %bb.0: -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: cmpw %si, %di -; CHECK-NEXT: sete %al -; CHECK-NEXT: retq +; NOBMI-LABEL: or_cmp_eq_i16: +; NOBMI: # %bb.0: +; NOBMI-NEXT: orl %edi, %esi +; NOBMI-NEXT: cmpw %si, %di +; NOBMI-NEXT: sete %al +; NOBMI-NEXT: retq +; +; BMI-LABEL: or_cmp_eq_i16: +; BMI: # %bb.0: +; BMI-NEXT: notl %edi +; BMI-NEXT: testw %si, %di +; BMI-NEXT: sete %al +; BMI-NEXT: retq %o = or i16 %x, %y %c = icmp eq i16 %x, %o ret i1 %c } define i1 @or_cmp_ne_i8(i8 zeroext %x, i8 zeroext %y) { -; CHECK-LABEL: or_cmp_ne_i8: -; CHECK: # %bb.0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: cmpb %dil, %sil -; CHECK-NEXT: setne %al -; CHECK-NEXT: retq +; NOBMI-LABEL: or_cmp_ne_i8: +; NOBMI: # %bb.0: +; NOBMI-NEXT: orl %esi, %edi +; NOBMI-NEXT: cmpb %dil, %sil +; NOBMI-NEXT: setne %al +; NOBMI-NEXT: retq +; +; BMI-LABEL: or_cmp_ne_i8: +; BMI: # %bb.0: +; BMI-NEXT: notb %sil +; BMI-NEXT: testb %dil, %sil +; BMI-NEXT: setne %al +; BMI-NEXT: retq %o = or i8 %x, %y %c = icmp ne i8 %y, %o ret i1 %c