[SDAG] expand more is-power-of-2 patterns that use popcount

(ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)

Adjust the legality check to avoid the poor codegen on AArch64.
We probably only want to use popcount on this pattern when it
is a single instruction.

fixes #57225

Differential Revision: https://reviews.llvm.org/D132237
This commit is contained in:
Sanjay Patel 2022-08-23 17:45:59 -04:00
parent 7d670976db
commit f8dfbea324
2 changed files with 34 additions and 73 deletions

View File

@ -4003,14 +4003,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
EVT CTVT = CTPOP.getValueType();
SDValue CTOp = CTPOP.getOperand(0);
// If this is a vector CTPOP, keep the CTPOP if it is legal.
// TODO: Should we check if CTPOP is legal(or custom) for scalars?
if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
return SDValue();
// Expand a power-of-2-or-zero comparison based on ctpop:
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
// Keep the CTPOP if it is a legal vector op.
if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
return SDValue();
unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
return SDValue();
@ -4029,16 +4029,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
}
// If ctpop is not supported, expand a power-of-2 comparison based on it.
// Expand a power-of-2 comparison based on ctpop:
// (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
// (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
// For scalars, keep CTPOP if it is legal or custom.
if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
// Keep the CTPOP if it is legal.
if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
return SDValue();
// This is based on X86's custom lowering for CTPOP which produces more
// instructions than the expansion here.
// (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
// (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
assert(CTVT.isInteger());

View File

@ -168,30 +168,18 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop_eq_one:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: cnt.8b v0, v0
; CHECK-NEXT: uaddlv.8b h0, v0
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: cmp x8, #1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: sub x8, x0, #1
; CHECK-NEXT: tst x0, x8
; CHECK-NEXT: ccmp x0, #0, #4, eq
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
;
; CHECK-NONEON-LABEL: ctpop_eq_one:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr x9, x0, #1
; CHECK-NONEON-NEXT: mov x8, #72340172838076673
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NONEON-NEXT: sub x9, x0, x9
; CHECK-NONEON-NEXT: lsr x10, x9, #2
; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333
; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333
; CHECK-NONEON-NEXT: add x9, x9, x10
; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4
; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
; CHECK-NONEON-NEXT: mul x8, x9, x8
; CHECK-NONEON-NEXT: lsr x8, x8, #56
; CHECK-NONEON-NEXT: cmp x8, #1
; CHECK-NONEON-NEXT: cset w0, eq
; CHECK-NONEON-NEXT: sub x8, x0, #1
; CHECK-NONEON-NEXT: tst x0, x8
; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
; CHECK-NONEON-NEXT: cset w0, ne
; CHECK-NONEON-NEXT: ret
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cmp = icmp eq i64 %count, 1
@ -202,30 +190,18 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop_ne_one:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: cnt.8b v0, v0
; CHECK-NEXT: uaddlv.8b h0, v0
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: cmp x8, #1
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: sub x8, x0, #1
; CHECK-NEXT: tst x0, x8
; CHECK-NEXT: ccmp x0, #0, #4, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
;
; CHECK-NONEON-LABEL: ctpop_ne_one:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr x9, x0, #1
; CHECK-NONEON-NEXT: mov x8, #72340172838076673
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NONEON-NEXT: sub x9, x0, x9
; CHECK-NONEON-NEXT: lsr x10, x9, #2
; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333
; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333
; CHECK-NONEON-NEXT: add x9, x9, x10
; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4
; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
; CHECK-NONEON-NEXT: mul x8, x9, x8
; CHECK-NONEON-NEXT: lsr x8, x8, #56
; CHECK-NONEON-NEXT: cmp x8, #1
; CHECK-NONEON-NEXT: cset w0, ne
; CHECK-NONEON-NEXT: sub x8, x0, #1
; CHECK-NONEON-NEXT: tst x0, x8
; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
; CHECK-NONEON-NEXT: cset w0, eq
; CHECK-NONEON-NEXT: ret
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cmp = icmp ne i64 %count, 1
@ -236,31 +212,18 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
; CHECK-LABEL: ctpop32_ne_one:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: cnt.8b v0, v0
; CHECK-NEXT: uaddlv.8b h0, v0
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: cmp w8, #1
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: sub w8, w0, #1
; CHECK-NEXT: tst w0, w8
; CHECK-NEXT: ccmp w0, #0, #4, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
;
; CHECK-NONEON-LABEL: ctpop32_ne_one:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
; CHECK-NONEON-NEXT: mov w8, #16843009
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
; CHECK-NONEON-NEXT: and w9, w9, #0x33333333
; CHECK-NONEON-NEXT: and w10, w10, #0x33333333
; CHECK-NONEON-NEXT: add w9, w9, w10
; CHECK-NONEON-NEXT: add w9, w9, w9, lsr #4
; CHECK-NONEON-NEXT: and w9, w9, #0xf0f0f0f
; CHECK-NONEON-NEXT: mul w8, w9, w8
; CHECK-NONEON-NEXT: lsr w8, w8, #24
; CHECK-NONEON-NEXT: cmp w8, #1
; CHECK-NONEON-NEXT: cset w0, ne
; CHECK-NONEON-NEXT: sub w8, w0, #1
; CHECK-NONEON-NEXT: tst w0, w8
; CHECK-NONEON-NEXT: ccmp w0, #0, #4, eq
; CHECK-NONEON-NEXT: cset w0, eq
; CHECK-NONEON-NEXT: ret
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ne i32 %count, 1