[RISCV][WebAssembly][TargetLowering] Allow expandCTLZ/expandCTTZ to rely on CTPOP expansion for vectors.

Our fallback expansion for CTLZ/CTTZ relies on CTPOP. If CTPOP
isn't legal or custom for a vector type we would scalarize the
CTLZ/CTTZ. This is different than CTPOP itself which would use a
vector expansion.

This patch teaches expandCTLZ/CTTZ to rely on the vector CTPOP
expansion instead of scalarizing. To do this I had to add additional
checks to make sure the operations used by CTPOP expansions are all
supported. Some of the operations were already needed for the CTLZ/CTTZ
expansion.

This is a huge improvement to the RISCV which doesn't have a scalar
ctlz or cttz in the base ISA.

For WebAssembly, I've added Custom lowering to keep the scalarizing
behavior. I've also extended the scalarizing to CTPOP.

Differential Revision: https://reviews.llvm.org/D111919
This commit is contained in:
Craig Topper 2021-10-19 16:10:02 -07:00
parent 60e19f6752
commit fe1f0de003
5 changed files with 2259 additions and 19686 deletions

View File

@ -6980,6 +6980,17 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
// Only expand vector types if we have the appropriate vector bit operations.
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
assert(VT.isVector() && "Expected vector type");
unsigned Len = VT.getScalarSizeInBits();
return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
(Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
}
bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
SDLoc dl(Node);
@ -6994,11 +7005,7 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
return false;
// Only expand vector types if we have the appropriate vector bit operations.
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
(Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
return false;
// This is the "best" algorithm from
@ -7068,8 +7075,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
}
// Only expand vector types if we have the appropriate vector bit operations.
// This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
!isOperationLegalOrCustom(ISD::CTPOP, VT) ||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
!canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
return false;
@ -7120,9 +7129,11 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
}
// Only expand vector types if we have the appropriate vector bit operations.
// This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
!isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
!isOperationLegalOrCustom(ISD::CTLZ, VT) &&
!canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))

View File

@ -215,8 +215,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::SELECT_CC, T, Expand);
// Expand integer operations supported for scalars but not SIMD
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
for (auto Op :
{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Expand);
@ -225,8 +225,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(Op, T, Legal);
// And we have popcnt for i8x16
// And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
// Custom lower bit counting operations for other types to scalarize them.
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Custom);
// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
@ -1405,6 +1412,10 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerLoad(Op, DAG);
case ISD::STORE:
return LowerStore(Op, DAG);
case ISD::CTPOP:
case ISD::CTLZ:
case ISD::CTTZ:
return DAG.UnrollVectorOp(Op.getNode());
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -120,8 +120,7 @@ define <8 x i16> @cttz_v8i16_undef(<8 x i16> %x) {
}
; CHECK-LABEL: ctpop_v8i16:
; Note: expansion does not use i32.popcnt
; CHECK: v128.and
; CHECK: i32.popcnt
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
define <8 x i16> @ctpop_v8i16(<8 x i16> %x) {
%v = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %x)
@ -209,8 +208,7 @@ define <4 x i32> @cttz_v4i32_undef(<4 x i32> %x) {
}
; CHECK-LABEL: ctpop_v4i32:
; Note: expansion does not use i32.popcnt
; CHECK: v128.and
; CHECK: i32.popcnt
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
define <4 x i32> @ctpop_v4i32(<4 x i32> %x) {
%v = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
@ -298,8 +296,7 @@ define <2 x i64> @cttz_v2i64_undef(<2 x i64> %x) {
}
; CHECK-LABEL: ctpop_v2i64:
; Note: expansion does not use i64.popcnt
; CHECK: v128.and
; CHECK: i64.popcnt
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
define <2 x i64> @ctpop_v2i64(<2 x i64> %x) {
%v = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)