From ac522f87002ffc20d377e284080c9fa7f63216fc Mon Sep 17 00:00:00 2001 From: Layton Kifer Date: Sun, 6 Dec 2020 11:50:42 -0500 Subject: [PATCH] [DAGCombiner] Fold (sext (not i1 x)) -> (add (zext i1 x), -1) Move fold of (sext (not i1 x)) -> (add (zext i1 x), -1) from X86 to DAGCombiner to improve codegen on other targets. Differential Revision: https://reviews.llvm.org/D91589 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 ++++++++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 11 --------- llvm/test/CodeGen/AArch64/select_const.ll | 7 +++--- llvm/test/CodeGen/ARM/select_const.ll | 24 +++++++------------ llvm/test/CodeGen/PowerPC/select_const.ll | 6 ++--- llvm/test/CodeGen/RISCV/sext-zext-trunc.ll | 21 +++++++--------- llvm/test/CodeGen/SystemZ/sext-zext.ll | 7 +++--- llvm/test/CodeGen/X86/pr44140.ll | 4 ++-- 8 files changed, 41 insertions(+), 52 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1a3d849ed99..c40c2502f536 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10663,6 +10663,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } + // fold sext (not i1 X) -> add (zext i1 X), -1 + // TODO: This could be extended to handle bool vectors. + if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() && + (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegal(ISD::ADD, VT)))) { + // If we can eliminate the 'not', the sext form should be better + if (SDValue NewXor = visitXOR(N0.getNode())) + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor); + + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); + } + return SDValue(); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bfd80690347d..690eb39fa0d4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46882,7 +46882,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - EVT InVT = N0.getValueType(); SDLoc DL(N); // (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) @@ -46911,16 +46910,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR && - isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) { - // Invert and sign-extend a boolean is the same as zero-extend and subtract - // 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently - // lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1. - // sext (xor Bool, -1) --> sub (zext Bool), 1 - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); - return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); - } - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; diff --git a/llvm/test/CodeGen/AArch64/select_const.ll b/llvm/test/CodeGen/AArch64/select_const.ll index affb8150ff85..945e7cdc35ad 100644 --- a/llvm/test/CodeGen/AArch64/select_const.ll +++ b/llvm/test/CodeGen/AArch64/select_const.ll @@ -68,8 +68,8 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { define i32 @select_0_or_neg1(i1 %cond) { ; CHECK-LABEL: select_0_or_neg1: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sbfx w0, w8, #0, #1 +; CHECK-NEXT: and w8, w0, #0x1 +; CHECK-NEXT: sub w0, w8, #1 // =1 ; CHECK-NEXT: ret %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -78,8 +78,7 @@ define i32 @select_0_or_neg1(i1 %cond) { define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_0_or_neg1_zeroext: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sbfx w0, w8, #0, #1 +; CHECK-NEXT: sub w0, w0, #1 // =1 ; CHECK-NEXT: ret %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll index 500426074736..03f538ea5313 100644 --- a/llvm/test/CodeGen/ARM/select_const.ll +++ b/llvm/test/CodeGen/ARM/select_const.ll @@ -137,23 +137,21 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { define i32 @select_0_or_neg1(i1 %cond) { ; ARM-LABEL: select_0_or_neg1: ; ARM: @ %bb.0: -; ARM-NEXT: mov r1, #1 -; ARM-NEXT: bic r0, r1, r0 -; ARM-NEXT: rsb r0, r0, #0 +; ARM-NEXT: and r0, r0, #1 +; ARM-NEXT: sub r0, r0, #1 ; ARM-NEXT: mov pc, lr ; ; THUMB2-LABEL: select_0_or_neg1: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: movs r1, #1 -; THUMB2-NEXT: bic.w r0, r1, r0 -; THUMB2-NEXT: rsbs r0, r0, #0 +; THUMB2-NEXT: and r0, r0, #1 +; THUMB2-NEXT: subs r0, #1 ; THUMB2-NEXT: bx lr ; ; THUMB-LABEL: select_0_or_neg1: ; THUMB: @ %bb.0: ; THUMB-NEXT: movs r1, #1 -; THUMB-NEXT: bics r1, r0 -; THUMB-NEXT: rsbs r0, r1, #0 +; THUMB-NEXT: ands r1, r0 +; THUMB-NEXT: subs r0, r1, #1 ; THUMB-NEXT: bx lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -162,21 +160,17 @@ define i32 @select_0_or_neg1(i1 %cond) { define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; ARM-LABEL: select_0_or_neg1_zeroext: ; ARM: @ %bb.0: -; ARM-NEXT: eor r0, r0, #1 -; ARM-NEXT: rsb r0, r0, #0 +; ARM-NEXT: sub r0, r0, #1 ; ARM-NEXT: mov pc, lr ; ; THUMB2-LABEL: select_0_or_neg1_zeroext: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: eor r0, r0, #1 -; THUMB2-NEXT: rsbs r0, r0, #0 +; THUMB2-NEXT: subs r0, #1 ; THUMB2-NEXT: bx lr ; ; THUMB-LABEL: select_0_or_neg1_zeroext: ; THUMB: @ %bb.0: -; THUMB-NEXT: movs r1, #1 -; THUMB-NEXT: eors r1, r0 -; THUMB-NEXT: rsbs r0, r1, #0 +; THUMB-NEXT: subs r0, r0, #1 ; THUMB-NEXT: bx lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll index e457ded57f6a..7e8b6297ed3c 100644 --- a/llvm/test/CodeGen/PowerPC/select_const.ll +++ b/llvm/test/CodeGen/PowerPC/select_const.ll @@ -69,9 +69,8 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { define i32 @select_0_or_neg1(i1 %cond) { ; ALL-LABEL: select_0_or_neg1: ; ALL: # %bb.0: -; ALL-NEXT: not 3, 3 ; ALL-NEXT: clrldi 3, 3, 63 -; ALL-NEXT: neg 3, 3 +; ALL-NEXT: addi 3, 3, -1 ; ALL-NEXT: blr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -80,8 +79,7 @@ define i32 @select_0_or_neg1(i1 %cond) { define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; ALL-LABEL: select_0_or_neg1_zeroext: ; ALL: # %bb.0: -; ALL-NEXT: xori 3, 3, 1 -; ALL-NEXT: neg 3, 3 +; ALL-NEXT: addi 3, 3, -1 ; ALL-NEXT: blr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index 2c7a9b13342c..da7faa366e63 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -437,20 +437,18 @@ define i32 @trunc_i64_to_i32(i64 %a) nounwind { ret i32 %1 } -;; TODO: fold (sext (not x)) -> (add (zext x) -1) +;; fold (sext (not x)) -> (add (zext x) -1) define i32 @sext_of_not_i32(i1 %x) { ; RV32I-LABEL: sext_of_not_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: ret %xor = xor i1 %x, 1 %sext = sext i1 %xor to i32 @@ -460,24 +458,23 @@ define i32 @sext_of_not_i32(i1 %x) { define i64 @sext_of_not_i64(i1 %x) { ; RV32I-LABEL: sext_of_not_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a0 -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: andi a1, a0, 1 +; RV32I-NEXT: addi a0, a1, -1 +; RV32I-NEXT: sltu a1, a0, a1 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: ret %xor = xor i1 %x, 1 %sext = sext i1 %xor to i64 ret i64 %sext } -;; TODO: fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) +;; fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) define i32 @sext_of_not_cmp_i32(i32 %x) { ; RV32I-LABEL: sext_of_not_cmp_i32: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/SystemZ/sext-zext.ll b/llvm/test/CodeGen/SystemZ/sext-zext.ll index 9e2d3bf27742..d48e4ba83588 100644 --- a/llvm/test/CodeGen/SystemZ/sext-zext.ll +++ b/llvm/test/CodeGen/SystemZ/sext-zext.ll @@ -1,20 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -;; TODO: fold (sext (not x)) -> (add (zext x) -1) +;; fold (sext (not x)) -> (add (zext x) -1) define i32 @sext_of_not(i1 %x) { ; CHECK-LABEL: sext_of_not: ; CHECK: # %bb.0: -; CHECK-NEXT: xilf %r2, 4294967295 ; CHECK-NEXT: nilf %r2, 1 -; CHECK-NEXT: lcr %r2, %r2 +; CHECK-NEXT: ahi %r2, -1 ; CHECK-NEXT: br %r14 %xor = xor i1 %x, 1 %sext = sext i1 %xor to i32 ret i32 %sext } -;; TODO: fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) +;; fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) define i32 @sext_of_not_cmp(i32 %x) { ; CHECK-LABEL: sext_of_not_cmp: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/pr44140.ll b/llvm/test/CodeGen/X86/pr44140.ll index 941f45d2d99a..9e623ae5bceb 100644 --- a/llvm/test/CodeGen/X86/pr44140.ll +++ b/llvm/test/CodeGen/X86/pr44140.ll @@ -49,8 +49,8 @@ define i32 @main() { ; CHECK-NEXT: movabsq $1010101010101010101, %rcx # imm = 0xE04998456557EB5 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq %rcx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: sete %al -; CHECK-NEXT: decl %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: negl %eax ; CHECK-NEXT: addq $584, %rsp # imm = 0x248 ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq